core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21    unsafe {
22        let a = a.as_i32x16();
23        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38    unsafe {
39        let abs = _mm512_abs_epi32(a).as_i32x16();
40        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
41    }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54    unsafe {
55        let abs = _mm512_abs_epi32(a).as_i32x16();
56        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
57    }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68    unsafe {
69        let abs = _mm256_abs_epi32(a).as_i32x8();
70        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
71    }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82    unsafe {
83        let abs = _mm256_abs_epi32(a).as_i32x8();
84        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
85    }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96    unsafe {
97        let abs = _mm_abs_epi32(a).as_i32x4();
98        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
99    }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi32(a).as_i32x4();
112        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124    unsafe {
125        let a = a.as_i64x8();
126        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
127        transmute(r)
128    }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139    unsafe {
140        let abs = _mm512_abs_epi64(a).as_i64x8();
141        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
142    }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153    unsafe {
154        let abs = _mm512_abs_epi64(a).as_i64x8();
155        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
156    }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167    unsafe {
168        let a = a.as_i64x4();
169        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
170        transmute(r)
171    }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182    unsafe {
183        let abs = _mm256_abs_epi64(a).as_i64x4();
184        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
185    }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196    unsafe {
197        let abs = _mm256_abs_epi64(a).as_i64x4();
198        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
199    }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210    unsafe {
211        let a = a.as_i64x2();
212        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
213        transmute(r)
214    }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225    unsafe {
226        let abs = _mm_abs_epi64(a).as_i64x2();
227        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239    unsafe {
240        let abs = _mm_abs_epi64(a).as_i64x2();
241        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
242    }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253    unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275    unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297    unsafe {
298        let mov = a.as_i32x16();
299        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
300    }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311    unsafe {
312        let mov = a.as_i32x16();
313        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
314    }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325    unsafe {
326        let mov = a.as_i32x8();
327        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
328    }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339    unsafe {
340        let mov = a.as_i32x8();
341        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
342    }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353    unsafe {
354        let mov = a.as_i32x4();
355        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
356    }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367    unsafe {
368        let mov = a.as_i32x4();
369        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
370    }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381    unsafe {
382        let mov = a.as_i64x8();
383        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
384    }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395    unsafe {
396        let mov = a.as_i64x8();
397        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
398    }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409    unsafe {
410        let mov = a.as_i64x4();
411        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
412    }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423    unsafe {
424        let mov = a.as_i64x4();
425        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437    unsafe {
438        let mov = a.as_i64x2();
439        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
440    }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451    unsafe {
452        let mov = a.as_i64x2();
453        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
454    }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465    unsafe {
466        let mov = a.as_f32x16();
467        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
468    }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479    unsafe {
480        let mov = a.as_f32x16();
481        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
482    }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493    unsafe {
494        let mov = a.as_f32x8();
495        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
496    }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507    unsafe {
508        let mov = a.as_f32x8();
509        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
510    }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521    unsafe {
522        let mov = a.as_f32x4();
523        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
524    }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535    unsafe {
536        let mov = a.as_f32x4();
537        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
538    }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549    unsafe {
550        let mov = a.as_f64x8();
551        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
552    }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563    unsafe {
564        let mov = a.as_f64x8();
565        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
566    }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577    unsafe {
578        let mov = a.as_f64x4();
579        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
580    }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591    unsafe {
592        let mov = a.as_f64x4();
593        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
594    }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605    unsafe {
606        let mov = a.as_f64x2();
607        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
608    }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619    unsafe {
620        let mov = a.as_f64x2();
621        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
622    }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644    unsafe {
645        let add = _mm512_add_epi32(a, b).as_i32x16();
646        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
647    }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658    unsafe {
659        let add = _mm512_add_epi32(a, b).as_i32x16();
660        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
661    }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672    unsafe {
673        let add = _mm256_add_epi32(a, b).as_i32x8();
674        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
675    }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686    unsafe {
687        let add = _mm256_add_epi32(a, b).as_i32x8();
688        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
689    }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700    unsafe {
701        let add = _mm_add_epi32(a, b).as_i32x4();
702        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
703    }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714    unsafe {
715        let add = _mm_add_epi32(a, b).as_i32x4();
716        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
717    }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739    unsafe {
740        let add = _mm512_add_epi64(a, b).as_i64x8();
741        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
742    }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_add_epi64(a, b).as_i64x8();
755        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
756    }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767    unsafe {
768        let add = _mm256_add_epi64(a, b).as_i64x4();
769        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
770    }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781    unsafe {
782        let add = _mm256_add_epi64(a, b).as_i64x4();
783        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
784    }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795    unsafe {
796        let add = _mm_add_epi64(a, b).as_i64x2();
797        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
798    }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809    unsafe {
810        let add = _mm_add_epi64(a, b).as_i64x2();
811        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
812    }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834    unsafe {
835        let add = _mm512_add_ps(a, b).as_f32x16();
836        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
837    }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848    unsafe {
849        let add = _mm512_add_ps(a, b).as_f32x16();
850        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
851    }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862    unsafe {
863        let add = _mm256_add_ps(a, b).as_f32x8();
864        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
865    }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876    unsafe {
877        let add = _mm256_add_ps(a, b).as_f32x8();
878        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
879    }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890    unsafe {
891        let add = _mm_add_ps(a, b).as_f32x4();
892        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
893    }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904    unsafe {
905        let add = _mm_add_ps(a, b).as_f32x4();
906        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
907    }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929    unsafe {
930        let add = _mm512_add_pd(a, b).as_f64x8();
931        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
932    }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943    unsafe {
944        let add = _mm512_add_pd(a, b).as_f64x8();
945        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
946    }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957    unsafe {
958        let add = _mm256_add_pd(a, b).as_f64x4();
959        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
960    }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971    unsafe {
972        let add = _mm256_add_pd(a, b).as_f64x4();
973        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
974    }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985    unsafe {
986        let add = _mm_add_pd(a, b).as_f64x2();
987        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
988    }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999    unsafe {
1000        let add = _mm_add_pd(a, b).as_f64x2();
1001        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1002    }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024    unsafe {
1025        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1026        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1027    }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038    unsafe {
1039        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1040        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1041    }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052    unsafe {
1053        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1054        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1055    }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066    unsafe {
1067        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1068        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1069    }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080    unsafe {
1081        let sub = _mm_sub_epi32(a, b).as_i32x4();
1082        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1083    }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094    unsafe {
1095        let sub = _mm_sub_epi32(a, b).as_i32x4();
1096        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1097    }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119    unsafe {
1120        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1122    }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133    unsafe {
1134        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1135        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1136    }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147    unsafe {
1148        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1149        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1150    }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161    unsafe {
1162        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1163        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1164    }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175    unsafe {
1176        let sub = _mm_sub_epi64(a, b).as_i64x2();
1177        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1178    }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189    unsafe {
1190        let sub = _mm_sub_epi64(a, b).as_i64x2();
1191        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1192    }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214    unsafe {
1215        let sub = _mm512_sub_ps(a, b).as_f32x16();
1216        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1217    }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228    unsafe {
1229        let sub = _mm512_sub_ps(a, b).as_f32x16();
1230        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1231    }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242    unsafe {
1243        let sub = _mm256_sub_ps(a, b).as_f32x8();
1244        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1245    }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256    unsafe {
1257        let sub = _mm256_sub_ps(a, b).as_f32x8();
1258        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1259    }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270    unsafe {
1271        let sub = _mm_sub_ps(a, b).as_f32x4();
1272        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1273    }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284    unsafe {
1285        let sub = _mm_sub_ps(a, b).as_f32x4();
1286        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1287    }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309    unsafe {
1310        let sub = _mm512_sub_pd(a, b).as_f64x8();
1311        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1312    }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323    unsafe {
1324        let sub = _mm512_sub_pd(a, b).as_f64x8();
1325        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1326    }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337    unsafe {
1338        let sub = _mm256_sub_pd(a, b).as_f64x4();
1339        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1340    }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351    unsafe {
1352        let sub = _mm256_sub_pd(a, b).as_f64x4();
1353        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1354    }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365    unsafe {
1366        let sub = _mm_sub_pd(a, b).as_f64x2();
1367        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1368    }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379    unsafe {
1380        let sub = _mm_sub_pd(a, b).as_f64x2();
1381        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1382    }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393    unsafe {
1394        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396        transmute(simd_mul(a, b))
1397    }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408    unsafe {
1409        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1410        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1411    }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422    unsafe {
1423        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1424        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1425    }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436    unsafe {
1437        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1438        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1439    }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450    unsafe {
1451        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1452        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1453    }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464    unsafe {
1465        let mul = _mm_mul_epi32(a, b).as_i64x2();
1466        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1467    }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478    unsafe {
1479        let mul = _mm_mul_epi32(a, b).as_i64x2();
1480        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1481    }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503    unsafe {
1504        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1505        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1506    }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517    unsafe {
1518        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1519        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1520    }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531    unsafe {
1532        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1533        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1534    }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545    unsafe {
1546        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1547        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1548    }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559    unsafe {
1560        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1561        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1562    }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573    unsafe {
1574        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1575        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1576    }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600    unsafe {
1601        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1602        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1603    }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614    unsafe {
1615        let a = a.as_u64x8();
1616        let b = b.as_u64x8();
1617        let mask = u64x8::splat(u32::MAX.into());
1618        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1619    }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630    unsafe {
1631        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1632        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1633    }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644    unsafe {
1645        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1646        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1647    }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658    unsafe {
1659        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1660        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1661    }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672    unsafe {
1673        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1674        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1675    }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686    unsafe {
1687        let mul = _mm_mul_epu32(a, b).as_u64x2();
1688        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1689    }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700    unsafe {
1701        let mul = _mm_mul_epu32(a, b).as_u64x2();
1702        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1703    }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725    unsafe {
1726        let mul = _mm512_mul_ps(a, b).as_f32x16();
1727        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1728    }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739    unsafe {
1740        let mul = _mm512_mul_ps(a, b).as_f32x16();
1741        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1742    }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753    unsafe {
1754        let mul = _mm256_mul_ps(a, b).as_f32x8();
1755        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1756    }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767    unsafe {
1768        let mul = _mm256_mul_ps(a, b).as_f32x8();
1769        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1770    }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781    unsafe {
1782        let mul = _mm_mul_ps(a, b).as_f32x4();
1783        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1784    }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795    unsafe {
1796        let mul = _mm_mul_ps(a, b).as_f32x4();
1797        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1798    }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820    unsafe {
1821        let mul = _mm512_mul_pd(a, b).as_f64x8();
1822        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1823    }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834    unsafe {
1835        let mul = _mm512_mul_pd(a, b).as_f64x8();
1836        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1837    }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848    unsafe {
1849        let mul = _mm256_mul_pd(a, b).as_f64x4();
1850        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1851    }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862    unsafe {
1863        let mul = _mm256_mul_pd(a, b).as_f64x4();
1864        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
1865    }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876    unsafe {
1877        let mul = _mm_mul_pd(a, b).as_f64x2();
1878        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
1879    }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890    unsafe {
1891        let mul = _mm_mul_pd(a, b).as_f64x2();
1892        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
1893    }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915    unsafe {
1916        let div = _mm512_div_ps(a, b).as_f32x16();
1917        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
1918    }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929    unsafe {
1930        let div = _mm512_div_ps(a, b).as_f32x16();
1931        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
1932    }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943    unsafe {
1944        let div = _mm256_div_ps(a, b).as_f32x8();
1945        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
1946    }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957    unsafe {
1958        let div = _mm256_div_ps(a, b).as_f32x8();
1959        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
1960    }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971    unsafe {
1972        let div = _mm_div_ps(a, b).as_f32x4();
1973        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
1974    }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985    unsafe {
1986        let div = _mm_div_ps(a, b).as_f32x4();
1987        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
1988    }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010    unsafe {
2011        let div = _mm512_div_pd(a, b).as_f64x8();
2012        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2013    }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024    unsafe {
2025        let div = _mm512_div_pd(a, b).as_f64x8();
2026        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2027    }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038    unsafe {
2039        let div = _mm256_div_pd(a, b).as_f64x4();
2040        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2041    }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052    unsafe {
2053        let div = _mm256_div_pd(a, b).as_f64x4();
2054        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2055    }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066    unsafe {
2067        let div = _mm_div_pd(a, b).as_f64x2();
2068        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2069    }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080    unsafe {
2081        let div = _mm_div_pd(a, b).as_f64x2();
2082        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2083    }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094    unsafe {
2095        let a = a.as_i32x16();
2096        let b = b.as_i32x16();
2097        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2098    }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109    unsafe {
2110        let max = _mm512_max_epi32(a, b).as_i32x16();
2111        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2112    }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123    unsafe {
2124        let max = _mm512_max_epi32(a, b).as_i32x16();
2125        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2126    }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137    unsafe {
2138        let max = _mm256_max_epi32(a, b).as_i32x8();
2139        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2140    }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151    unsafe {
2152        let max = _mm256_max_epi32(a, b).as_i32x8();
2153        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2154    }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165    unsafe {
2166        let max = _mm_max_epi32(a, b).as_i32x4();
2167        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2168    }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179    unsafe {
2180        let max = _mm_max_epi32(a, b).as_i32x4();
2181        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2182    }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193    unsafe {
2194        let a = a.as_i64x8();
2195        let b = b.as_i64x8();
2196        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2197    }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208    unsafe {
2209        let max = _mm512_max_epi64(a, b).as_i64x8();
2210        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2211    }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222    unsafe {
2223        let max = _mm512_max_epi64(a, b).as_i64x8();
2224        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2225    }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236    unsafe {
2237        let a = a.as_i64x4();
2238        let b = b.as_i64x4();
2239        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2240    }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251    unsafe {
2252        let max = _mm256_max_epi64(a, b).as_i64x4();
2253        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2254    }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265    unsafe {
2266        let max = _mm256_max_epi64(a, b).as_i64x4();
2267        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2268    }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279    unsafe {
2280        let a = a.as_i64x2();
2281        let b = b.as_i64x2();
2282        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2283    }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294    unsafe {
2295        let max = _mm_max_epi64(a, b).as_i64x2();
2296        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2297    }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308    unsafe {
2309        let max = _mm_max_epi64(a, b).as_i64x2();
2310        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2311    }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322    unsafe {
2323        transmute(vmaxps(
2324            a.as_f32x16(),
2325            b.as_f32x16(),
2326            _MM_FROUND_CUR_DIRECTION,
2327        ))
2328    }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339    unsafe {
2340        let max = _mm512_max_ps(a, b).as_f32x16();
2341        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2342    }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353    unsafe {
2354        let max = _mm512_max_ps(a, b).as_f32x16();
2355        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2356    }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367    unsafe {
2368        let max = _mm256_max_ps(a, b).as_f32x8();
2369        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2370    }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381    unsafe {
2382        let max = _mm256_max_ps(a, b).as_f32x8();
2383        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2384    }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395    unsafe {
2396        let max = _mm_max_ps(a, b).as_f32x4();
2397        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2398    }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409    unsafe {
2410        let max = _mm_max_ps(a, b).as_f32x4();
2411        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2412    }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434    unsafe {
2435        let max = _mm512_max_pd(a, b).as_f64x8();
2436        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2437    }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448    unsafe {
2449        let max = _mm512_max_pd(a, b).as_f64x8();
2450        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2451    }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462    unsafe {
2463        let max = _mm256_max_pd(a, b).as_f64x4();
2464        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2465    }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476    unsafe {
2477        let max = _mm256_max_pd(a, b).as_f64x4();
2478        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2479    }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490    unsafe {
2491        let max = _mm_max_pd(a, b).as_f64x2();
2492        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2493    }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504    unsafe {
2505        let max = _mm_max_pd(a, b).as_f64x2();
2506        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2507    }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518    unsafe {
2519        let a = a.as_u32x16();
2520        let b = b.as_u32x16();
2521        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2522    }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533    unsafe {
2534        let max = _mm512_max_epu32(a, b).as_u32x16();
2535        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2536    }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547    unsafe {
2548        let max = _mm512_max_epu32(a, b).as_u32x16();
2549        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2550    }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561    unsafe {
2562        let max = _mm256_max_epu32(a, b).as_u32x8();
2563        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2564    }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575    unsafe {
2576        let max = _mm256_max_epu32(a, b).as_u32x8();
2577        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2578    }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589    unsafe {
2590        let max = _mm_max_epu32(a, b).as_u32x4();
2591        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2592    }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603    unsafe {
2604        let max = _mm_max_epu32(a, b).as_u32x4();
2605        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2606    }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617    unsafe {
2618        let a = a.as_u64x8();
2619        let b = b.as_u64x8();
2620        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2621    }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632    unsafe {
2633        let max = _mm512_max_epu64(a, b).as_u64x8();
2634        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2635    }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646    unsafe {
2647        let max = _mm512_max_epu64(a, b).as_u64x8();
2648        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2649    }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660    unsafe {
2661        let a = a.as_u64x4();
2662        let b = b.as_u64x4();
2663        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2664    }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675    unsafe {
2676        let max = _mm256_max_epu64(a, b).as_u64x4();
2677        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2678    }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689    unsafe {
2690        let max = _mm256_max_epu64(a, b).as_u64x4();
2691        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2692    }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703    unsafe {
2704        let a = a.as_u64x2();
2705        let b = b.as_u64x2();
2706        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2707    }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718    unsafe {
2719        let max = _mm_max_epu64(a, b).as_u64x2();
2720        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2721    }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732    unsafe {
2733        let max = _mm_max_epu64(a, b).as_u64x2();
2734        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2735    }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746    unsafe {
2747        let a = a.as_i32x16();
2748        let b = b.as_i32x16();
2749        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2750    }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761    unsafe {
2762        let min = _mm512_min_epi32(a, b).as_i32x16();
2763        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2764    }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775    unsafe {
2776        let min = _mm512_min_epi32(a, b).as_i32x16();
2777        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2778    }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789    unsafe {
2790        let min = _mm256_min_epi32(a, b).as_i32x8();
2791        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2792    }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803    unsafe {
2804        let min = _mm256_min_epi32(a, b).as_i32x8();
2805        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2806    }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817    unsafe {
2818        let min = _mm_min_epi32(a, b).as_i32x4();
2819        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2820    }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831    unsafe {
2832        let min = _mm_min_epi32(a, b).as_i32x4();
2833        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2834    }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845    unsafe {
2846        let a = a.as_i64x8();
2847        let b = b.as_i64x8();
2848        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2849    }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860    unsafe {
2861        let min = _mm512_min_epi64(a, b).as_i64x8();
2862        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
2863    }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874    unsafe {
2875        let min = _mm512_min_epi64(a, b).as_i64x8();
2876        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
2877    }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888    unsafe {
2889        let a = a.as_i64x4();
2890        let b = b.as_i64x4();
2891        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2892    }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903    unsafe {
2904        let min = _mm256_min_epi64(a, b).as_i64x4();
2905        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
2906    }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917    unsafe {
2918        let min = _mm256_min_epi64(a, b).as_i64x4();
2919        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
2920    }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931    unsafe {
2932        let a = a.as_i64x2();
2933        let b = b.as_i64x2();
2934        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2935    }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946    unsafe {
2947        let min = _mm_min_epi64(a, b).as_i64x2();
2948        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
2949    }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960    unsafe {
2961        let min = _mm_min_epi64(a, b).as_i64x2();
2962        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
2963    }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974    unsafe {
2975        transmute(vminps(
2976            a.as_f32x16(),
2977            b.as_f32x16(),
2978            _MM_FROUND_CUR_DIRECTION,
2979        ))
2980    }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991    unsafe {
2992        let min = _mm512_min_ps(a, b).as_f32x16();
2993        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
2994    }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005    unsafe {
3006        let min = _mm512_min_ps(a, b).as_f32x16();
3007        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3008    }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019    unsafe {
3020        let min = _mm256_min_ps(a, b).as_f32x8();
3021        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3022    }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033    unsafe {
3034        let min = _mm256_min_ps(a, b).as_f32x8();
3035        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3036    }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047    unsafe {
3048        let min = _mm_min_ps(a, b).as_f32x4();
3049        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3050    }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061    unsafe {
3062        let min = _mm_min_ps(a, b).as_f32x4();
3063        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3064    }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086    unsafe {
3087        let min = _mm512_min_pd(a, b).as_f64x8();
3088        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3089    }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100    unsafe {
3101        let min = _mm512_min_pd(a, b).as_f64x8();
3102        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3103    }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114    unsafe {
3115        let min = _mm256_min_pd(a, b).as_f64x4();
3116        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3117    }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128    unsafe {
3129        let min = _mm256_min_pd(a, b).as_f64x4();
3130        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3131    }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142    unsafe {
3143        let min = _mm_min_pd(a, b).as_f64x2();
3144        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3145    }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156    unsafe {
3157        let min = _mm_min_pd(a, b).as_f64x2();
3158        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3159    }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170    unsafe {
3171        let a = a.as_u32x16();
3172        let b = b.as_u32x16();
3173        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
3174    }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185    unsafe {
3186        let min = _mm512_min_epu32(a, b).as_u32x16();
3187        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3188    }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199    unsafe {
3200        let min = _mm512_min_epu32(a, b).as_u32x16();
3201        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3202    }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213    unsafe {
3214        let min = _mm256_min_epu32(a, b).as_u32x8();
3215        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3216    }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227    unsafe {
3228        let min = _mm256_min_epu32(a, b).as_u32x8();
3229        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3230    }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241    unsafe {
3242        let min = _mm_min_epu32(a, b).as_u32x4();
3243        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3244    }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255    unsafe {
3256        let min = _mm_min_epu32(a, b).as_u32x4();
3257        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3258    }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269    unsafe {
3270        let a = a.as_u64x8();
3271        let b = b.as_u64x8();
3272        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
3273    }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284    unsafe {
3285        let min = _mm512_min_epu64(a, b).as_u64x8();
3286        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3287    }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298    unsafe {
3299        let min = _mm512_min_epu64(a, b).as_u64x8();
3300        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3301    }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312    unsafe {
3313        let a = a.as_u64x4();
3314        let b = b.as_u64x4();
3315        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
3316    }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327    unsafe {
3328        let min = _mm256_min_epu64(a, b).as_u64x4();
3329        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3330    }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341    unsafe {
3342        let min = _mm256_min_epu64(a, b).as_u64x4();
3343        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3344    }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355    unsafe {
3356        let a = a.as_u64x2();
3357        let b = b.as_u64x2();
3358        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
3359    }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370    unsafe {
3371        let min = _mm_min_epu64(a, b).as_u64x2();
3372        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3373    }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384    unsafe {
3385        let min = _mm_min_epu64(a, b).as_u64x2();
3386        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3387    }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398    unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475    unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552    unsafe { simd_fma(a, b, c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662    unsafe { simd_fma(a, b, c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772    unsafe { simd_fma(a, b, simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882    unsafe { simd_fma(a, b, simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992    unsafe {
3993        let add = simd_fma(a, b, c);
3994        let sub = simd_fma(a, b, simd_neg(c));
3995        simd_shuffle!(
3996            add,
3997            sub,
3998            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999        )
4000    }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110    unsafe {
4111        let add = simd_fma(a, b, c);
4112        let sub = simd_fma(a, b, simd_neg(c));
4113        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114    }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224    unsafe {
4225        let add = simd_fma(a, b, c);
4226        let sub = simd_fma(a, b, simd_neg(c));
4227        simd_shuffle!(
4228            add,
4229            sub,
4230            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231        )
4232    }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342    unsafe {
4343        let add = simd_fma(a, b, c);
4344        let sub = simd_fma(a, b, simd_neg(c));
4345        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346    }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456    unsafe { simd_fma(simd_neg(a), b, c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566    unsafe { simd_fma(simd_neg(a), b, c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292    unsafe {
5293        transmute(vgetexpps(
5294            a.as_f32x16(),
5295            f32x16::ZERO,
5296            0b11111111_11111111,
5297            _MM_FROUND_CUR_DIRECTION,
5298        ))
5299    }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310    unsafe {
5311        transmute(vgetexpps(
5312            a.as_f32x16(),
5313            src.as_f32x16(),
5314            k,
5315            _MM_FROUND_CUR_DIRECTION,
5316        ))
5317    }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328    unsafe {
5329        transmute(vgetexpps(
5330            a.as_f32x16(),
5331            f32x16::ZERO,
5332            k,
5333            _MM_FROUND_CUR_DIRECTION,
5334        ))
5335    }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412    unsafe {
5413        transmute(vgetexppd(
5414            a.as_f64x8(),
5415            f64x8::ZERO,
5416            0b11111111,
5417            _MM_FROUND_CUR_DIRECTION,
5418        ))
5419    }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430    unsafe {
5431        transmute(vgetexppd(
5432            a.as_f64x8(),
5433            src.as_f64x8(),
5434            k,
5435            _MM_FROUND_CUR_DIRECTION,
5436        ))
5437    }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448    unsafe {
5449        transmute(vgetexppd(
5450            a.as_f64x8(),
5451            f64x8::ZERO,
5452            k,
5453            _MM_FROUND_CUR_DIRECTION,
5454        ))
5455    }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539    unsafe {
5540        static_assert_uimm_bits!(IMM8, 8);
5541        let a = a.as_f32x16();
5542        let r = vrndscaleps(
5543            a,
5544            IMM8,
5545            f32x16::ZERO,
5546            0b11111111_11111111,
5547            _MM_FROUND_CUR_DIRECTION,
5548        );
5549        transmute(r)
5550    }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568    unsafe {
5569        static_assert_uimm_bits!(IMM8, 8);
5570        let a = a.as_f32x16();
5571        let src = src.as_f32x16();
5572        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5573        transmute(r)
5574    }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592    unsafe {
5593        static_assert_uimm_bits!(IMM8, 8);
5594        let a = a.as_f32x16();
5595        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5596        transmute(r)
5597    }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615    unsafe {
5616        static_assert_uimm_bits!(IMM8, 8);
5617        let a = a.as_f32x8();
5618        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5619        transmute(r)
5620    }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe {
5639        static_assert_uimm_bits!(IMM8, 8);
5640        let a = a.as_f32x8();
5641        let src = src.as_f32x8();
5642        let r = vrndscaleps256(a, IMM8, src, k);
5643        transmute(r)
5644    }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662    unsafe {
5663        static_assert_uimm_bits!(IMM8, 8);
5664        let a = a.as_f32x8();
5665        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5666        transmute(r)
5667    }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685    unsafe {
5686        static_assert_uimm_bits!(IMM8, 8);
5687        let a = a.as_f32x4();
5688        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5689        transmute(r)
5690    }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708    unsafe {
5709        static_assert_uimm_bits!(IMM8, 8);
5710        let a = a.as_f32x4();
5711        let src = src.as_f32x4();
5712        let r = vrndscaleps128(a, IMM8, src, k);
5713        transmute(r)
5714    }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732    unsafe {
5733        static_assert_uimm_bits!(IMM8, 8);
5734        let a = a.as_f32x4();
5735        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
5736        transmute(r)
5737    }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755    unsafe {
5756        static_assert_uimm_bits!(IMM8, 8);
5757        let a = a.as_f64x8();
5758        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5759        transmute(r)
5760    }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778    src: __m512d,
5779    k: __mmask8,
5780    a: __m512d,
5781) -> __m512d {
5782    unsafe {
5783        static_assert_uimm_bits!(IMM8, 8);
5784        let a = a.as_f64x8();
5785        let src = src.as_f64x8();
5786        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5787        transmute(r)
5788    }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806    unsafe {
5807        static_assert_uimm_bits!(IMM8, 8);
5808        let a = a.as_f64x8();
5809        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5810        transmute(r)
5811    }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829    unsafe {
5830        static_assert_uimm_bits!(IMM8, 8);
5831        let a = a.as_f64x4();
5832        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
5833        transmute(r)
5834    }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852    src: __m256d,
5853    k: __mmask8,
5854    a: __m256d,
5855) -> __m256d {
5856    unsafe {
5857        static_assert_uimm_bits!(IMM8, 8);
5858        let a = a.as_f64x4();
5859        let src = src.as_f64x4();
5860        let r = vrndscalepd256(a, IMM8, src, k);
5861        transmute(r)
5862    }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880    unsafe {
5881        static_assert_uimm_bits!(IMM8, 8);
5882        let a = a.as_f64x4();
5883        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
5884        transmute(r)
5885    }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903    unsafe {
5904        static_assert_uimm_bits!(IMM8, 8);
5905        let a = a.as_f64x2();
5906        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
5907        transmute(r)
5908    }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926    unsafe {
5927        static_assert_uimm_bits!(IMM8, 8);
5928        let a = a.as_f64x2();
5929        let src = src.as_f64x2();
5930        let r = vrndscalepd128(a, IMM8, src, k);
5931        transmute(r)
5932    }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950    unsafe {
5951        static_assert_uimm_bits!(IMM8, 8);
5952        let a = a.as_f64x2();
5953        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
5954        transmute(r)
5955    }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966    unsafe {
5967        transmute(vscalefps(
5968            a.as_f32x16(),
5969            b.as_f32x16(),
5970            f32x16::ZERO,
5971            0b11111111_11111111,
5972            _MM_FROUND_CUR_DIRECTION,
5973        ))
5974    }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985    unsafe {
5986        transmute(vscalefps(
5987            a.as_f32x16(),
5988            b.as_f32x16(),
5989            src.as_f32x16(),
5990            k,
5991            _MM_FROUND_CUR_DIRECTION,
5992        ))
5993    }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004    unsafe {
6005        transmute(vscalefps(
6006            a.as_f32x16(),
6007            b.as_f32x16(),
6008            f32x16::ZERO,
6009            k,
6010            _MM_FROUND_CUR_DIRECTION,
6011        ))
6012    }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023    unsafe {
6024        transmute(vscalefps256(
6025            a.as_f32x8(),
6026            b.as_f32x8(),
6027            f32x8::ZERO,
6028            0b11111111,
6029        ))
6030    }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063    unsafe {
6064        transmute(vscalefps128(
6065            a.as_f32x4(),
6066            b.as_f32x4(),
6067            f32x4::ZERO,
6068            0b00001111,
6069        ))
6070    }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103    unsafe {
6104        transmute(vscalefpd(
6105            a.as_f64x8(),
6106            b.as_f64x8(),
6107            f64x8::ZERO,
6108            0b11111111,
6109            _MM_FROUND_CUR_DIRECTION,
6110        ))
6111    }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122    unsafe {
6123        transmute(vscalefpd(
6124            a.as_f64x8(),
6125            b.as_f64x8(),
6126            src.as_f64x8(),
6127            k,
6128            _MM_FROUND_CUR_DIRECTION,
6129        ))
6130    }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141    unsafe {
6142        transmute(vscalefpd(
6143            a.as_f64x8(),
6144            b.as_f64x8(),
6145            f64x8::ZERO,
6146            k,
6147            _MM_FROUND_CUR_DIRECTION,
6148        ))
6149    }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160    unsafe {
6161        transmute(vscalefpd256(
6162            a.as_f64x4(),
6163            b.as_f64x4(),
6164            f64x4::ZERO,
6165            0b00001111,
6166        ))
6167    }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200    unsafe {
6201        transmute(vscalefpd128(
6202            a.as_f64x2(),
6203            b.as_f64x2(),
6204            f64x2::ZERO,
6205            0b00000011,
6206        ))
6207    }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241    unsafe {
6242        static_assert_uimm_bits!(IMM8, 8);
6243        let a = a.as_f32x16();
6244        let b = b.as_f32x16();
6245        let c = c.as_i32x16();
6246        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247        transmute(r)
6248    }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260    a: __m512,
6261    k: __mmask16,
6262    b: __m512,
6263    c: __m512i,
6264) -> __m512 {
6265    unsafe {
6266        static_assert_uimm_bits!(IMM8, 8);
6267        let a = a.as_f32x16();
6268        let b = b.as_f32x16();
6269        let c = c.as_i32x16();
6270        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6271        transmute(r)
6272    }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284    k: __mmask16,
6285    a: __m512,
6286    b: __m512,
6287    c: __m512i,
6288) -> __m512 {
6289    unsafe {
6290        static_assert_uimm_bits!(IMM8, 8);
6291        let a = a.as_f32x16();
6292        let b = b.as_f32x16();
6293        let c = c.as_i32x16();
6294        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6295        transmute(r)
6296    }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308    unsafe {
6309        static_assert_uimm_bits!(IMM8, 8);
6310        let a = a.as_f32x8();
6311        let b = b.as_f32x8();
6312        let c = c.as_i32x8();
6313        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6314        transmute(r)
6315    }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327    a: __m256,
6328    k: __mmask8,
6329    b: __m256,
6330    c: __m256i,
6331) -> __m256 {
6332    unsafe {
6333        static_assert_uimm_bits!(IMM8, 8);
6334        let a = a.as_f32x8();
6335        let b = b.as_f32x8();
6336        let c = c.as_i32x8();
6337        let r = vfixupimmps256(a, b, c, IMM8, k);
6338        transmute(r)
6339    }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351    k: __mmask8,
6352    a: __m256,
6353    b: __m256,
6354    c: __m256i,
6355) -> __m256 {
6356    unsafe {
6357        static_assert_uimm_bits!(IMM8, 8);
6358        let a = a.as_f32x8();
6359        let b = b.as_f32x8();
6360        let c = c.as_i32x8();
6361        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6362        transmute(r)
6363    }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375    unsafe {
6376        static_assert_uimm_bits!(IMM8, 8);
6377        let a = a.as_f32x4();
6378        let b = b.as_f32x4();
6379        let c = c.as_i32x4();
6380        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6381        transmute(r)
6382    }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394    a: __m128,
6395    k: __mmask8,
6396    b: __m128,
6397    c: __m128i,
6398) -> __m128 {
6399    unsafe {
6400        static_assert_uimm_bits!(IMM8, 8);
6401        let a = a.as_f32x4();
6402        let b = b.as_f32x4();
6403        let c = c.as_i32x4();
6404        let r = vfixupimmps128(a, b, c, IMM8, k);
6405        transmute(r)
6406    }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418    k: __mmask8,
6419    a: __m128,
6420    b: __m128,
6421    c: __m128i,
6422) -> __m128 {
6423    unsafe {
6424        static_assert_uimm_bits!(IMM8, 8);
6425        let a = a.as_f32x4();
6426        let b = b.as_f32x4();
6427        let c = c.as_i32x4();
6428        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6429        transmute(r)
6430    }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442    unsafe {
6443        static_assert_uimm_bits!(IMM8, 8);
6444        let a = a.as_f64x8();
6445        let b = b.as_f64x8();
6446        let c = c.as_i64x8();
6447        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6448        transmute(r)
6449    }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461    a: __m512d,
6462    k: __mmask8,
6463    b: __m512d,
6464    c: __m512i,
6465) -> __m512d {
6466    unsafe {
6467        static_assert_uimm_bits!(IMM8, 8);
6468        let a = a.as_f64x8();
6469        let b = b.as_f64x8();
6470        let c = c.as_i64x8();
6471        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6472        transmute(r)
6473    }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485    k: __mmask8,
6486    a: __m512d,
6487    b: __m512d,
6488    c: __m512i,
6489) -> __m512d {
6490    unsafe {
6491        static_assert_uimm_bits!(IMM8, 8);
6492        let a = a.as_f64x8();
6493        let b = b.as_f64x8();
6494        let c = c.as_i64x8();
6495        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6496        transmute(r)
6497    }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509    unsafe {
6510        static_assert_uimm_bits!(IMM8, 8);
6511        let a = a.as_f64x4();
6512        let b = b.as_f64x4();
6513        let c = c.as_i64x4();
6514        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6515        transmute(r)
6516    }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528    a: __m256d,
6529    k: __mmask8,
6530    b: __m256d,
6531    c: __m256i,
6532) -> __m256d {
6533    unsafe {
6534        static_assert_uimm_bits!(IMM8, 8);
6535        let a = a.as_f64x4();
6536        let b = b.as_f64x4();
6537        let c = c.as_i64x4();
6538        let r = vfixupimmpd256(a, b, c, IMM8, k);
6539        transmute(r)
6540    }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552    k: __mmask8,
6553    a: __m256d,
6554    b: __m256d,
6555    c: __m256i,
6556) -> __m256d {
6557    unsafe {
6558        static_assert_uimm_bits!(IMM8, 8);
6559        let a = a.as_f64x4();
6560        let b = b.as_f64x4();
6561        let c = c.as_i64x4();
6562        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6563        transmute(r)
6564    }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576    unsafe {
6577        static_assert_uimm_bits!(IMM8, 8);
6578        let a = a.as_f64x2();
6579        let b = b.as_f64x2();
6580        let c = c.as_i64x2();
6581        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6582        transmute(r)
6583    }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595    a: __m128d,
6596    k: __mmask8,
6597    b: __m128d,
6598    c: __m128i,
6599) -> __m128d {
6600    unsafe {
6601        static_assert_uimm_bits!(IMM8, 8);
6602        let a = a.as_f64x2();
6603        let b = b.as_f64x2();
6604        let c = c.as_i64x2();
6605        let r = vfixupimmpd128(a, b, c, IMM8, k);
6606        transmute(r)
6607    }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619    k: __mmask8,
6620    a: __m128d,
6621    b: __m128d,
6622    c: __m128i,
6623) -> __m128d {
6624    unsafe {
6625        static_assert_uimm_bits!(IMM8, 8);
6626        let a = a.as_f64x2();
6627        let b = b.as_f64x2();
6628        let c = c.as_i64x2();
6629        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6630        transmute(r)
6631    }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643    unsafe {
6644        static_assert_uimm_bits!(IMM8, 8);
6645        let a = a.as_i32x16();
6646        let b = b.as_i32x16();
6647        let c = c.as_i32x16();
6648        let r = vpternlogd(a, b, c, IMM8);
6649        transmute(r)
6650    }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662    src: __m512i,
6663    k: __mmask16,
6664    a: __m512i,
6665    b: __m512i,
6666) -> __m512i {
6667    unsafe {
6668        static_assert_uimm_bits!(IMM8, 8);
6669        let src = src.as_i32x16();
6670        let a = a.as_i32x16();
6671        let b = b.as_i32x16();
6672        let r = vpternlogd(src, a, b, IMM8);
6673        transmute(simd_select_bitmask(k, r, src))
6674    }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686    k: __mmask16,
6687    a: __m512i,
6688    b: __m512i,
6689    c: __m512i,
6690) -> __m512i {
6691    unsafe {
6692        static_assert_uimm_bits!(IMM8, 8);
6693        let a = a.as_i32x16();
6694        let b = b.as_i32x16();
6695        let c = c.as_i32x16();
6696        let r = vpternlogd(a, b, c, IMM8);
6697        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6698    }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710    unsafe {
6711        static_assert_uimm_bits!(IMM8, 8);
6712        let a = a.as_i32x8();
6713        let b = b.as_i32x8();
6714        let c = c.as_i32x8();
6715        let r = vpternlogd256(a, b, c, IMM8);
6716        transmute(r)
6717    }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729    src: __m256i,
6730    k: __mmask8,
6731    a: __m256i,
6732    b: __m256i,
6733) -> __m256i {
6734    unsafe {
6735        static_assert_uimm_bits!(IMM8, 8);
6736        let src = src.as_i32x8();
6737        let a = a.as_i32x8();
6738        let b = b.as_i32x8();
6739        let r = vpternlogd256(src, a, b, IMM8);
6740        transmute(simd_select_bitmask(k, r, src))
6741    }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753    k: __mmask8,
6754    a: __m256i,
6755    b: __m256i,
6756    c: __m256i,
6757) -> __m256i {
6758    unsafe {
6759        static_assert_uimm_bits!(IMM8, 8);
6760        let a = a.as_i32x8();
6761        let b = b.as_i32x8();
6762        let c = c.as_i32x8();
6763        let r = vpternlogd256(a, b, c, IMM8);
6764        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
6765    }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777    unsafe {
6778        static_assert_uimm_bits!(IMM8, 8);
6779        let a = a.as_i32x4();
6780        let b = b.as_i32x4();
6781        let c = c.as_i32x4();
6782        let r = vpternlogd128(a, b, c, IMM8);
6783        transmute(r)
6784    }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796    src: __m128i,
6797    k: __mmask8,
6798    a: __m128i,
6799    b: __m128i,
6800) -> __m128i {
6801    unsafe {
6802        static_assert_uimm_bits!(IMM8, 8);
6803        let src = src.as_i32x4();
6804        let a = a.as_i32x4();
6805        let b = b.as_i32x4();
6806        let r = vpternlogd128(src, a, b, IMM8);
6807        transmute(simd_select_bitmask(k, r, src))
6808    }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820    k: __mmask8,
6821    a: __m128i,
6822    b: __m128i,
6823    c: __m128i,
6824) -> __m128i {
6825    unsafe {
6826        static_assert_uimm_bits!(IMM8, 8);
6827        let a = a.as_i32x4();
6828        let b = b.as_i32x4();
6829        let c = c.as_i32x4();
6830        let r = vpternlogd128(a, b, c, IMM8);
6831        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
6832    }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844    unsafe {
6845        static_assert_uimm_bits!(IMM8, 8);
6846        let a = a.as_i64x8();
6847        let b = b.as_i64x8();
6848        let c = c.as_i64x8();
6849        let r = vpternlogq(a, b, c, IMM8);
6850        transmute(r)
6851    }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863    src: __m512i,
6864    k: __mmask8,
6865    a: __m512i,
6866    b: __m512i,
6867) -> __m512i {
6868    unsafe {
6869        static_assert_uimm_bits!(IMM8, 8);
6870        let src = src.as_i64x8();
6871        let a = a.as_i64x8();
6872        let b = b.as_i64x8();
6873        let r = vpternlogq(src, a, b, IMM8);
6874        transmute(simd_select_bitmask(k, r, src))
6875    }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887    k: __mmask8,
6888    a: __m512i,
6889    b: __m512i,
6890    c: __m512i,
6891) -> __m512i {
6892    unsafe {
6893        static_assert_uimm_bits!(IMM8, 8);
6894        let a = a.as_i64x8();
6895        let b = b.as_i64x8();
6896        let c = c.as_i64x8();
6897        let r = vpternlogq(a, b, c, IMM8);
6898        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
6899    }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911    unsafe {
6912        static_assert_uimm_bits!(IMM8, 8);
6913        let a = a.as_i64x4();
6914        let b = b.as_i64x4();
6915        let c = c.as_i64x4();
6916        let r = vpternlogq256(a, b, c, IMM8);
6917        transmute(r)
6918    }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930    src: __m256i,
6931    k: __mmask8,
6932    a: __m256i,
6933    b: __m256i,
6934) -> __m256i {
6935    unsafe {
6936        static_assert_uimm_bits!(IMM8, 8);
6937        let src = src.as_i64x4();
6938        let a = a.as_i64x4();
6939        let b = b.as_i64x4();
6940        let r = vpternlogq256(src, a, b, IMM8);
6941        transmute(simd_select_bitmask(k, r, src))
6942    }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954    k: __mmask8,
6955    a: __m256i,
6956    b: __m256i,
6957    c: __m256i,
6958) -> __m256i {
6959    unsafe {
6960        static_assert_uimm_bits!(IMM8, 8);
6961        let a = a.as_i64x4();
6962        let b = b.as_i64x4();
6963        let c = c.as_i64x4();
6964        let r = vpternlogq256(a, b, c, IMM8);
6965        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
6966    }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978    unsafe {
6979        static_assert_uimm_bits!(IMM8, 8);
6980        let a = a.as_i64x2();
6981        let b = b.as_i64x2();
6982        let c = c.as_i64x2();
6983        let r = vpternlogq128(a, b, c, IMM8);
6984        transmute(r)
6985    }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997    src: __m128i,
6998    k: __mmask8,
6999    a: __m128i,
7000    b: __m128i,
7001) -> __m128i {
7002    unsafe {
7003        static_assert_uimm_bits!(IMM8, 8);
7004        let src = src.as_i64x2();
7005        let a = a.as_i64x2();
7006        let b = b.as_i64x2();
7007        let r = vpternlogq128(src, a, b, IMM8);
7008        transmute(simd_select_bitmask(k, r, src))
7009    }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021    k: __mmask8,
7022    a: __m128i,
7023    b: __m128i,
7024    c: __m128i,
7025) -> __m128i {
7026    unsafe {
7027        static_assert_uimm_bits!(IMM8, 8);
7028        let a = a.as_i64x2();
7029        let b = b.as_i64x2();
7030        let c = c.as_i64x2();
7031        let r = vpternlogq128(a, b, c, IMM8);
7032        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7033    }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038///    _MM_MANT_NORM_1_2     // interval [1, 2)
7039///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7040///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7041///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043///    _MM_MANT_SIGN_src     // sign = sign(src)
7044///    _MM_MANT_SIGN_zero    // sign = 0
7045///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054    a: __m512,
7055) -> __m512 {
7056    unsafe {
7057        static_assert_uimm_bits!(NORM, 4);
7058        static_assert_uimm_bits!(SIGN, 2);
7059        let a = a.as_f32x16();
7060        let zero = f32x16::ZERO;
7061        let r = vgetmantps(
7062            a,
7063            SIGN << 2 | NORM,
7064            zero,
7065            0b11111111_11111111,
7066            _MM_FROUND_CUR_DIRECTION,
7067        );
7068        transmute(r)
7069    }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7075///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7076///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7077///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079///    _MM_MANT_SIGN_src     // sign = sign(src)\
7080///    _MM_MANT_SIGN_zero    // sign = 0\
7081///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090    const NORM: _MM_MANTISSA_NORM_ENUM,
7091    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093    src: __m512,
7094    k: __mmask16,
7095    a: __m512,
7096) -> __m512 {
7097    unsafe {
7098        static_assert_uimm_bits!(NORM, 4);
7099        static_assert_uimm_bits!(SIGN, 2);
7100        let a = a.as_f32x16();
7101        let src = src.as_f32x16();
7102        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7103        transmute(r)
7104    }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7110///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7111///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7112///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114///    _MM_MANT_SIGN_src     // sign = sign(src)\
7115///    _MM_MANT_SIGN_zero    // sign = 0\
7116///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125    const NORM: _MM_MANTISSA_NORM_ENUM,
7126    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128    k: __mmask16,
7129    a: __m512,
7130) -> __m512 {
7131    unsafe {
7132        static_assert_uimm_bits!(NORM, 4);
7133        static_assert_uimm_bits!(SIGN, 2);
7134        let a = a.as_f32x16();
7135        let r = vgetmantps(
7136            a,
7137            SIGN << 2 | NORM,
7138            f32x16::ZERO,
7139            k,
7140            _MM_FROUND_CUR_DIRECTION,
7141        );
7142        transmute(r)
7143    }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148///    _MM_MANT_NORM_1_2     // interval [1, 2)
7149///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7150///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7151///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153///    _MM_MANT_SIGN_src     // sign = sign(src)
7154///    _MM_MANT_SIGN_zero    // sign = 0
7155///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164    a: __m256,
7165) -> __m256 {
7166    unsafe {
7167        static_assert_uimm_bits!(NORM, 4);
7168        static_assert_uimm_bits!(SIGN, 2);
7169        let a = a.as_f32x8();
7170        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7171        transmute(r)
7172    }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182///    _MM_MANT_SIGN_src     // sign = sign(src)\
7183///    _MM_MANT_SIGN_zero    // sign = 0\
7184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193    const NORM: _MM_MANTISSA_NORM_ENUM,
7194    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196    src: __m256,
7197    k: __mmask8,
7198    a: __m256,
7199) -> __m256 {
7200    unsafe {
7201        static_assert_uimm_bits!(NORM, 4);
7202        static_assert_uimm_bits!(SIGN, 2);
7203        let a = a.as_f32x8();
7204        let src = src.as_f32x8();
7205        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7206        transmute(r)
7207    }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7213///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7214///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7215///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217///    _MM_MANT_SIGN_src     // sign = sign(src)\
7218///    _MM_MANT_SIGN_zero    // sign = 0\
7219///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228    const NORM: _MM_MANTISSA_NORM_ENUM,
7229    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231    k: __mmask8,
7232    a: __m256,
7233) -> __m256 {
7234    unsafe {
7235        static_assert_uimm_bits!(NORM, 4);
7236        static_assert_uimm_bits!(SIGN, 2);
7237        let a = a.as_f32x8();
7238        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7239        transmute(r)
7240    }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245///    _MM_MANT_NORM_1_2     // interval [1, 2)
7246///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7247///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7248///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250///    _MM_MANT_SIGN_src     // sign = sign(src)
7251///    _MM_MANT_SIGN_zero    // sign = 0
7252///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261    a: __m128,
7262) -> __m128 {
7263    unsafe {
7264        static_assert_uimm_bits!(NORM, 4);
7265        static_assert_uimm_bits!(SIGN, 2);
7266        let a = a.as_f32x4();
7267        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7268        transmute(r)
7269    }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7275///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7276///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7277///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279///    _MM_MANT_SIGN_src     // sign = sign(src)\
7280///    _MM_MANT_SIGN_zero    // sign = 0\
7281///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290    const NORM: _MM_MANTISSA_NORM_ENUM,
7291    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293    src: __m128,
7294    k: __mmask8,
7295    a: __m128,
7296) -> __m128 {
7297    unsafe {
7298        static_assert_uimm_bits!(NORM, 4);
7299        static_assert_uimm_bits!(SIGN, 2);
7300        let a = a.as_f32x4();
7301        let src = src.as_f32x4();
7302        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7303        transmute(r)
7304    }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7310///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7311///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7312///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314///    _MM_MANT_SIGN_src     // sign = sign(src)\
7315///    _MM_MANT_SIGN_zero    // sign = 0\
7316///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325    const NORM: _MM_MANTISSA_NORM_ENUM,
7326    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328    k: __mmask8,
7329    a: __m128,
7330) -> __m128 {
7331    unsafe {
7332        static_assert_uimm_bits!(NORM, 4);
7333        static_assert_uimm_bits!(SIGN, 2);
7334        let a = a.as_f32x4();
7335        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7336        transmute(r)
7337    }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7343///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7344///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7345///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347///    _MM_MANT_SIGN_src     // sign = sign(src)\
7348///    _MM_MANT_SIGN_zero    // sign = 0\
7349///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358    a: __m512d,
7359) -> __m512d {
7360    unsafe {
7361        static_assert_uimm_bits!(NORM, 4);
7362        static_assert_uimm_bits!(SIGN, 2);
7363        let a = a.as_f64x8();
7364        let zero = f64x8::ZERO;
7365        let r = vgetmantpd(
7366            a,
7367            SIGN << 2 | NORM,
7368            zero,
7369            0b11111111,
7370            _MM_FROUND_CUR_DIRECTION,
7371        );
7372        transmute(r)
7373    }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7379///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7380///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7381///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383///    _MM_MANT_SIGN_src     // sign = sign(src)\
7384///    _MM_MANT_SIGN_zero    // sign = 0\
7385///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394    const NORM: _MM_MANTISSA_NORM_ENUM,
7395    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397    src: __m512d,
7398    k: __mmask8,
7399    a: __m512d,
7400) -> __m512d {
7401    unsafe {
7402        static_assert_uimm_bits!(NORM, 4);
7403        static_assert_uimm_bits!(SIGN, 2);
7404        let a = a.as_f64x8();
7405        let src = src.as_f64x8();
7406        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7407        transmute(r)
7408    }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7414///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7415///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7416///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418///    _MM_MANT_SIGN_src     // sign = sign(src)\
7419///    _MM_MANT_SIGN_zero    // sign = 0\
7420///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429    const NORM: _MM_MANTISSA_NORM_ENUM,
7430    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432    k: __mmask8,
7433    a: __m512d,
7434) -> __m512d {
7435    unsafe {
7436        static_assert_uimm_bits!(NORM, 4);
7437        static_assert_uimm_bits!(SIGN, 2);
7438        let a = a.as_f64x8();
7439        let r = vgetmantpd(
7440            a,
7441            SIGN << 2 | NORM,
7442            f64x8::ZERO,
7443            k,
7444            _MM_FROUND_CUR_DIRECTION,
7445        );
7446        transmute(r)
7447    }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7453///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7454///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7455///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457///    _MM_MANT_SIGN_src     // sign = sign(src)\
7458///    _MM_MANT_SIGN_zero    // sign = 0\
7459///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468    a: __m256d,
7469) -> __m256d {
7470    unsafe {
7471        static_assert_uimm_bits!(NORM, 4);
7472        static_assert_uimm_bits!(SIGN, 2);
7473        let a = a.as_f64x4();
7474        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7475        transmute(r)
7476    }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7482///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7483///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7484///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486///    _MM_MANT_SIGN_src     // sign = sign(src)\
7487///    _MM_MANT_SIGN_zero    // sign = 0\
7488///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497    const NORM: _MM_MANTISSA_NORM_ENUM,
7498    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500    src: __m256d,
7501    k: __mmask8,
7502    a: __m256d,
7503) -> __m256d {
7504    unsafe {
7505        static_assert_uimm_bits!(NORM, 4);
7506        static_assert_uimm_bits!(SIGN, 2);
7507        let a = a.as_f64x4();
7508        let src = src.as_f64x4();
7509        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7510        transmute(r)
7511    }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7517///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7518///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7519///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521///    _MM_MANT_SIGN_src     // sign = sign(src)\
7522///    _MM_MANT_SIGN_zero    // sign = 0\
7523///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532    const NORM: _MM_MANTISSA_NORM_ENUM,
7533    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535    k: __mmask8,
7536    a: __m256d,
7537) -> __m256d {
7538    unsafe {
7539        static_assert_uimm_bits!(NORM, 4);
7540        static_assert_uimm_bits!(SIGN, 2);
7541        let a = a.as_f64x4();
7542        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7543        transmute(r)
7544    }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7550///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7551///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7552///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554///    _MM_MANT_SIGN_src     // sign = sign(src)\
7555///    _MM_MANT_SIGN_zero    // sign = 0\
7556///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565    a: __m128d,
7566) -> __m128d {
7567    unsafe {
7568        static_assert_uimm_bits!(NORM, 4);
7569        static_assert_uimm_bits!(SIGN, 2);
7570        let a = a.as_f64x2();
7571        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7572        transmute(r)
7573    }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7579///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7580///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7581///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583///    _MM_MANT_SIGN_src     // sign = sign(src)\
7584///    _MM_MANT_SIGN_zero    // sign = 0\
7585///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594    const NORM: _MM_MANTISSA_NORM_ENUM,
7595    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597    src: __m128d,
7598    k: __mmask8,
7599    a: __m128d,
7600) -> __m128d {
7601    unsafe {
7602        static_assert_uimm_bits!(NORM, 4);
7603        static_assert_uimm_bits!(SIGN, 2);
7604        let a = a.as_f64x2();
7605        let src = src.as_f64x2();
7606        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7607        transmute(r)
7608    }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7614///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7615///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7616///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618///    _MM_MANT_SIGN_src     // sign = sign(src)\
7619///    _MM_MANT_SIGN_zero    // sign = 0\
7620///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629    const NORM: _MM_MANTISSA_NORM_ENUM,
7630    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632    k: __mmask8,
7633    a: __m128d,
7634) -> __m128d {
7635    unsafe {
7636        static_assert_uimm_bits!(NORM, 4);
7637        static_assert_uimm_bits!(SIGN, 2);
7638        let a = a.as_f64x2();
7639        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7640        transmute(r)
7641    }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660    unsafe {
7661        static_assert_rounding!(ROUNDING);
7662        let a = a.as_f32x16();
7663        let b = b.as_f32x16();
7664        let r = vaddps(a, b, ROUNDING);
7665        transmute(r)
7666    }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685    src: __m512,
7686    k: __mmask16,
7687    a: __m512,
7688    b: __m512,
7689) -> __m512 {
7690    unsafe {
7691        static_assert_rounding!(ROUNDING);
7692        let a = a.as_f32x16();
7693        let b = b.as_f32x16();
7694        let r = vaddps(a, b, ROUNDING);
7695        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7696    }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715    k: __mmask16,
7716    a: __m512,
7717    b: __m512,
7718) -> __m512 {
7719    unsafe {
7720        static_assert_rounding!(ROUNDING);
7721        let a = a.as_f32x16();
7722        let b = b.as_f32x16();
7723        let r = vaddps(a, b, ROUNDING);
7724        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7725    }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744    unsafe {
7745        static_assert_rounding!(ROUNDING);
7746        let a = a.as_f64x8();
7747        let b = b.as_f64x8();
7748        let r = vaddpd(a, b, ROUNDING);
7749        transmute(r)
7750    }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769    src: __m512d,
7770    k: __mmask8,
7771    a: __m512d,
7772    b: __m512d,
7773) -> __m512d {
7774    unsafe {
7775        static_assert_rounding!(ROUNDING);
7776        let a = a.as_f64x8();
7777        let b = b.as_f64x8();
7778        let r = vaddpd(a, b, ROUNDING);
7779        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7780    }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799    k: __mmask8,
7800    a: __m512d,
7801    b: __m512d,
7802) -> __m512d {
7803    unsafe {
7804        static_assert_rounding!(ROUNDING);
7805        let a = a.as_f64x8();
7806        let b = b.as_f64x8();
7807        let r = vaddpd(a, b, ROUNDING);
7808        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7809    }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828    unsafe {
7829        static_assert_rounding!(ROUNDING);
7830        let a = a.as_f32x16();
7831        let b = b.as_f32x16();
7832        let r = vsubps(a, b, ROUNDING);
7833        transmute(r)
7834    }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853    src: __m512,
7854    k: __mmask16,
7855    a: __m512,
7856    b: __m512,
7857) -> __m512 {
7858    unsafe {
7859        static_assert_rounding!(ROUNDING);
7860        let a = a.as_f32x16();
7861        let b = b.as_f32x16();
7862        let r = vsubps(a, b, ROUNDING);
7863        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7864    }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883    k: __mmask16,
7884    a: __m512,
7885    b: __m512,
7886) -> __m512 {
7887    unsafe {
7888        static_assert_rounding!(ROUNDING);
7889        let a = a.as_f32x16();
7890        let b = b.as_f32x16();
7891        let r = vsubps(a, b, ROUNDING);
7892        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7893    }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912    unsafe {
7913        static_assert_rounding!(ROUNDING);
7914        let a = a.as_f64x8();
7915        let b = b.as_f64x8();
7916        let r = vsubpd(a, b, ROUNDING);
7917        transmute(r)
7918    }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937    src: __m512d,
7938    k: __mmask8,
7939    a: __m512d,
7940    b: __m512d,
7941) -> __m512d {
7942    unsafe {
7943        static_assert_rounding!(ROUNDING);
7944        let a = a.as_f64x8();
7945        let b = b.as_f64x8();
7946        let r = vsubpd(a, b, ROUNDING);
7947        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7948    }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967    k: __mmask8,
7968    a: __m512d,
7969    b: __m512d,
7970) -> __m512d {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f64x8();
7974        let b = b.as_f64x8();
7975        let r = vsubpd(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7977    }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996    unsafe {
7997        static_assert_rounding!(ROUNDING);
7998        let a = a.as_f32x16();
7999        let b = b.as_f32x16();
8000        let r = vmulps(a, b, ROUNDING);
8001        transmute(r)
8002    }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021    src: __m512,
8022    k: __mmask16,
8023    a: __m512,
8024    b: __m512,
8025) -> __m512 {
8026    unsafe {
8027        static_assert_rounding!(ROUNDING);
8028        let a = a.as_f32x16();
8029        let b = b.as_f32x16();
8030        let r = vmulps(a, b, ROUNDING);
8031        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8032    }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051    k: __mmask16,
8052    a: __m512,
8053    b: __m512,
8054) -> __m512 {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f32x16();
8058        let b = b.as_f32x16();
8059        let r = vmulps(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8061    }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080    unsafe {
8081        static_assert_rounding!(ROUNDING);
8082        let a = a.as_f64x8();
8083        let b = b.as_f64x8();
8084        let r = vmulpd(a, b, ROUNDING);
8085        transmute(r)
8086    }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105    src: __m512d,
8106    k: __mmask8,
8107    a: __m512d,
8108    b: __m512d,
8109) -> __m512d {
8110    unsafe {
8111        static_assert_rounding!(ROUNDING);
8112        let a = a.as_f64x8();
8113        let b = b.as_f64x8();
8114        let r = vmulpd(a, b, ROUNDING);
8115        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8116    }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135    k: __mmask8,
8136    a: __m512d,
8137    b: __m512d,
8138) -> __m512d {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f64x8();
8142        let b = b.as_f64x8();
8143        let r = vmulpd(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8145    }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164    unsafe {
8165        static_assert_rounding!(ROUNDING);
8166        let a = a.as_f32x16();
8167        let b = b.as_f32x16();
8168        let r = vdivps(a, b, ROUNDING);
8169        transmute(r)
8170    }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189    src: __m512,
8190    k: __mmask16,
8191    a: __m512,
8192    b: __m512,
8193) -> __m512 {
8194    unsafe {
8195        static_assert_rounding!(ROUNDING);
8196        let a = a.as_f32x16();
8197        let b = b.as_f32x16();
8198        let r = vdivps(a, b, ROUNDING);
8199        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8200    }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219    k: __mmask16,
8220    a: __m512,
8221    b: __m512,
8222) -> __m512 {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f32x16();
8226        let b = b.as_f32x16();
8227        let r = vdivps(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8229    }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248    unsafe {
8249        static_assert_rounding!(ROUNDING);
8250        let a = a.as_f64x8();
8251        let b = b.as_f64x8();
8252        let r = vdivpd(a, b, ROUNDING);
8253        transmute(r)
8254    }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273    src: __m512d,
8274    k: __mmask8,
8275    a: __m512d,
8276    b: __m512d,
8277) -> __m512d {
8278    unsafe {
8279        static_assert_rounding!(ROUNDING);
8280        let a = a.as_f64x8();
8281        let b = b.as_f64x8();
8282        let r = vdivpd(a, b, ROUNDING);
8283        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8284    }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303    k: __mmask8,
8304    a: __m512d,
8305    b: __m512d,
8306) -> __m512d {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f64x8();
8310        let b = b.as_f64x8();
8311        let r = vdivpd(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8313    }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332    unsafe {
8333        static_assert_rounding!(ROUNDING);
8334        let a = a.as_f32x16();
8335        let r = vsqrtps(a, ROUNDING);
8336        transmute(r)
8337    }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356    src: __m512,
8357    k: __mmask16,
8358    a: __m512,
8359) -> __m512 {
8360    unsafe {
8361        static_assert_rounding!(ROUNDING);
8362        let a = a.as_f32x16();
8363        let r = vsqrtps(a, ROUNDING);
8364        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8365    }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384    unsafe {
8385        static_assert_rounding!(ROUNDING);
8386        let a = a.as_f32x16();
8387        let r = vsqrtps(a, ROUNDING);
8388        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8389    }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408    unsafe {
8409        static_assert_rounding!(ROUNDING);
8410        let a = a.as_f64x8();
8411        let r = vsqrtpd(a, ROUNDING);
8412        transmute(r)
8413    }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432    src: __m512d,
8433    k: __mmask8,
8434    a: __m512d,
8435) -> __m512d {
8436    unsafe {
8437        static_assert_rounding!(ROUNDING);
8438        let a = a.as_f64x8();
8439        let r = vsqrtpd(a, ROUNDING);
8440        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8441    }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460    unsafe {
8461        static_assert_rounding!(ROUNDING);
8462        let a = a.as_f64x8();
8463        let r = vsqrtpd(a, ROUNDING);
8464        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8465    }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484    unsafe {
8485        static_assert_rounding!(ROUNDING);
8486        vfmadd132psround(a, b, c, ROUNDING)
8487    }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506    a: __m512,
8507    k: __mmask16,
8508    b: __m512,
8509    c: __m512,
8510) -> __m512 {
8511    unsafe {
8512        static_assert_rounding!(ROUNDING);
8513        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8514    }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533    k: __mmask16,
8534    a: __m512,
8535    b: __m512,
8536    c: __m512,
8537) -> __m512 {
8538    unsafe {
8539        static_assert_rounding!(ROUNDING);
8540        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8541    }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560    a: __m512,
8561    b: __m512,
8562    c: __m512,
8563    k: __mmask16,
8564) -> __m512 {
8565    unsafe {
8566        static_assert_rounding!(ROUNDING);
8567        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8568    }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587    unsafe {
8588        static_assert_rounding!(ROUNDING);
8589        vfmadd132pdround(a, b, c, ROUNDING)
8590    }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609    a: __m512d,
8610    k: __mmask8,
8611    b: __m512d,
8612    c: __m512d,
8613) -> __m512d {
8614    unsafe {
8615        static_assert_rounding!(ROUNDING);
8616        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8617    }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636    k: __mmask8,
8637    a: __m512d,
8638    b: __m512d,
8639    c: __m512d,
8640) -> __m512d {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8644    }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663    a: __m512d,
8664    b: __m512d,
8665    c: __m512d,
8666    k: __mmask8,
8667) -> __m512d {
8668    unsafe {
8669        static_assert_rounding!(ROUNDING);
8670        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8671    }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690    unsafe {
8691        static_assert_rounding!(ROUNDING);
8692        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8693    }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712    a: __m512,
8713    k: __mmask16,
8714    b: __m512,
8715    c: __m512,
8716) -> __m512 {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8720        simd_select_bitmask(k, r, a)
8721    }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740    k: __mmask16,
8741    a: __m512,
8742    b: __m512,
8743    c: __m512,
8744) -> __m512 {
8745    unsafe {
8746        static_assert_rounding!(ROUNDING);
8747        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8748        simd_select_bitmask(k, r, _mm512_setzero_ps())
8749    }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768    a: __m512,
8769    b: __m512,
8770    c: __m512,
8771    k: __mmask16,
8772) -> __m512 {
8773    unsafe {
8774        static_assert_rounding!(ROUNDING);
8775        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8776        simd_select_bitmask(k, r, c)
8777    }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796    unsafe {
8797        static_assert_rounding!(ROUNDING);
8798        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
8799    }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818    a: __m512d,
8819    k: __mmask8,
8820    b: __m512d,
8821    c: __m512d,
8822) -> __m512d {
8823    unsafe {
8824        static_assert_rounding!(ROUNDING);
8825        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8826        simd_select_bitmask(k, r, a)
8827    }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846    k: __mmask8,
8847    a: __m512d,
8848    b: __m512d,
8849    c: __m512d,
8850) -> __m512d {
8851    unsafe {
8852        static_assert_rounding!(ROUNDING);
8853        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8854        simd_select_bitmask(k, r, _mm512_setzero_pd())
8855    }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874    a: __m512d,
8875    b: __m512d,
8876    c: __m512d,
8877    k: __mmask8,
8878) -> __m512d {
8879    unsafe {
8880        static_assert_rounding!(ROUNDING);
8881        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8882        simd_select_bitmask(k, r, c)
8883    }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902    unsafe {
8903        static_assert_rounding!(ROUNDING);
8904        vfmaddsubpsround(a, b, c, ROUNDING)
8905    }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924    a: __m512,
8925    k: __mmask16,
8926    b: __m512,
8927    c: __m512,
8928) -> __m512 {
8929    unsafe {
8930        static_assert_rounding!(ROUNDING);
8931        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
8932    }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951    k: __mmask16,
8952    a: __m512,
8953    b: __m512,
8954    c: __m512,
8955) -> __m512 {
8956    unsafe {
8957        static_assert_rounding!(ROUNDING);
8958        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
8959    }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978    a: __m512,
8979    b: __m512,
8980    c: __m512,
8981    k: __mmask16,
8982) -> __m512 {
8983    unsafe {
8984        static_assert_rounding!(ROUNDING);
8985        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
8986    }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005    a: __m512d,
9006    b: __m512d,
9007    c: __m512d,
9008) -> __m512d {
9009    unsafe {
9010        static_assert_rounding!(ROUNDING);
9011        vfmaddsubpdround(a, b, c, ROUNDING)
9012    }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031    a: __m512d,
9032    k: __mmask8,
9033    b: __m512d,
9034    c: __m512d,
9035) -> __m512d {
9036    unsafe {
9037        static_assert_rounding!(ROUNDING);
9038        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9039    }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058    k: __mmask8,
9059    a: __m512d,
9060    b: __m512d,
9061    c: __m512d,
9062) -> __m512d {
9063    unsafe {
9064        static_assert_rounding!(ROUNDING);
9065        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9066    }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085    a: __m512d,
9086    b: __m512d,
9087    c: __m512d,
9088    k: __mmask8,
9089) -> __m512d {
9090    unsafe {
9091        static_assert_rounding!(ROUNDING);
9092        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9093    }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112    unsafe {
9113        static_assert_rounding!(ROUNDING);
9114        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9115    }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134    a: __m512,
9135    k: __mmask16,
9136    b: __m512,
9137    c: __m512,
9138) -> __m512 {
9139    unsafe {
9140        static_assert_rounding!(ROUNDING);
9141        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9142        simd_select_bitmask(k, r, a)
9143    }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162    k: __mmask16,
9163    a: __m512,
9164    b: __m512,
9165    c: __m512,
9166) -> __m512 {
9167    unsafe {
9168        static_assert_rounding!(ROUNDING);
9169        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9170        simd_select_bitmask(k, r, _mm512_setzero_ps())
9171    }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190    a: __m512,
9191    b: __m512,
9192    c: __m512,
9193    k: __mmask16,
9194) -> __m512 {
9195    unsafe {
9196        static_assert_rounding!(ROUNDING);
9197        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9198        simd_select_bitmask(k, r, c)
9199    }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218    a: __m512d,
9219    b: __m512d,
9220    c: __m512d,
9221) -> __m512d {
9222    unsafe {
9223        static_assert_rounding!(ROUNDING);
9224        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9225    }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244    a: __m512d,
9245    k: __mmask8,
9246    b: __m512d,
9247    c: __m512d,
9248) -> __m512d {
9249    unsafe {
9250        static_assert_rounding!(ROUNDING);
9251        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9252        simd_select_bitmask(k, r, a)
9253    }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272    k: __mmask8,
9273    a: __m512d,
9274    b: __m512d,
9275    c: __m512d,
9276) -> __m512d {
9277    unsafe {
9278        static_assert_rounding!(ROUNDING);
9279        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9280        simd_select_bitmask(k, r, _mm512_setzero_pd())
9281    }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300    a: __m512d,
9301    b: __m512d,
9302    c: __m512d,
9303    k: __mmask8,
9304) -> __m512d {
9305    unsafe {
9306        static_assert_rounding!(ROUNDING);
9307        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9308        simd_select_bitmask(k, r, c)
9309    }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328    unsafe {
9329        static_assert_rounding!(ROUNDING);
9330        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9331    }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350    a: __m512,
9351    k: __mmask16,
9352    b: __m512,
9353    c: __m512,
9354) -> __m512 {
9355    unsafe {
9356        static_assert_rounding!(ROUNDING);
9357        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9358        simd_select_bitmask(k, r, a)
9359    }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378    k: __mmask16,
9379    a: __m512,
9380    b: __m512,
9381    c: __m512,
9382) -> __m512 {
9383    unsafe {
9384        static_assert_rounding!(ROUNDING);
9385        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9386        simd_select_bitmask(k, r, _mm512_setzero_ps())
9387    }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406    a: __m512,
9407    b: __m512,
9408    c: __m512,
9409    k: __mmask16,
9410) -> __m512 {
9411    unsafe {
9412        static_assert_rounding!(ROUNDING);
9413        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9414        simd_select_bitmask(k, r, c)
9415    }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434    unsafe {
9435        static_assert_rounding!(ROUNDING);
9436        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9437    }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456    a: __m512d,
9457    k: __mmask8,
9458    b: __m512d,
9459    c: __m512d,
9460) -> __m512d {
9461    unsafe {
9462        static_assert_rounding!(ROUNDING);
9463        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9464        simd_select_bitmask(k, r, a)
9465    }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484    k: __mmask8,
9485    a: __m512d,
9486    b: __m512d,
9487    c: __m512d,
9488) -> __m512d {
9489    unsafe {
9490        static_assert_rounding!(ROUNDING);
9491        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9492        simd_select_bitmask(k, r, _mm512_setzero_pd())
9493    }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512    a: __m512d,
9513    b: __m512d,
9514    c: __m512d,
9515    k: __mmask8,
9516) -> __m512d {
9517    unsafe {
9518        static_assert_rounding!(ROUNDING);
9519        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9520        simd_select_bitmask(k, r, c)
9521    }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540    unsafe {
9541        static_assert_rounding!(ROUNDING);
9542        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9543    }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562    a: __m512,
9563    k: __mmask16,
9564    b: __m512,
9565    c: __m512,
9566) -> __m512 {
9567    unsafe {
9568        static_assert_rounding!(ROUNDING);
9569        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9570        simd_select_bitmask(k, r, a)
9571    }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590    k: __mmask16,
9591    a: __m512,
9592    b: __m512,
9593    c: __m512,
9594) -> __m512 {
9595    unsafe {
9596        static_assert_rounding!(ROUNDING);
9597        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9598        simd_select_bitmask(k, r, _mm512_setzero_ps())
9599    }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618    a: __m512,
9619    b: __m512,
9620    c: __m512,
9621    k: __mmask16,
9622) -> __m512 {
9623    unsafe {
9624        static_assert_rounding!(ROUNDING);
9625        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9626        simd_select_bitmask(k, r, c)
9627    }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646    unsafe {
9647        static_assert_rounding!(ROUNDING);
9648        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9649    }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668    a: __m512d,
9669    k: __mmask8,
9670    b: __m512d,
9671    c: __m512d,
9672) -> __m512d {
9673    unsafe {
9674        static_assert_rounding!(ROUNDING);
9675        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9676        simd_select_bitmask(k, r, a)
9677    }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696    k: __mmask8,
9697    a: __m512d,
9698    b: __m512d,
9699    c: __m512d,
9700) -> __m512d {
9701    unsafe {
9702        static_assert_rounding!(ROUNDING);
9703        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9704        simd_select_bitmask(k, r, _mm512_setzero_pd())
9705    }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724    a: __m512d,
9725    b: __m512d,
9726    c: __m512d,
9727    k: __mmask8,
9728) -> __m512d {
9729    unsafe {
9730        static_assert_rounding!(ROUNDING);
9731        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9732        simd_select_bitmask(k, r, c)
9733    }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746    unsafe {
9747        static_assert_sae!(SAE);
9748        let a = a.as_f32x16();
9749        let b = b.as_f32x16();
9750        let r = vmaxps(a, b, SAE);
9751        transmute(r)
9752    }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765    src: __m512,
9766    k: __mmask16,
9767    a: __m512,
9768    b: __m512,
9769) -> __m512 {
9770    unsafe {
9771        static_assert_sae!(SAE);
9772        let a = a.as_f32x16();
9773        let b = b.as_f32x16();
9774        let r = vmaxps(a, b, SAE);
9775        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9776    }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789    unsafe {
9790        static_assert_sae!(SAE);
9791        let a = a.as_f32x16();
9792        let b = b.as_f32x16();
9793        let r = vmaxps(a, b, SAE);
9794        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9795    }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808    unsafe {
9809        static_assert_sae!(SAE);
9810        let a = a.as_f64x8();
9811        let b = b.as_f64x8();
9812        let r = vmaxpd(a, b, SAE);
9813        transmute(r)
9814    }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827    src: __m512d,
9828    k: __mmask8,
9829    a: __m512d,
9830    b: __m512d,
9831) -> __m512d {
9832    unsafe {
9833        static_assert_sae!(SAE);
9834        let a = a.as_f64x8();
9835        let b = b.as_f64x8();
9836        let r = vmaxpd(a, b, SAE);
9837        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9838    }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851    unsafe {
9852        static_assert_sae!(SAE);
9853        let a = a.as_f64x8();
9854        let b = b.as_f64x8();
9855        let r = vmaxpd(a, b, SAE);
9856        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9857    }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870    unsafe {
9871        static_assert_sae!(SAE);
9872        let a = a.as_f32x16();
9873        let b = b.as_f32x16();
9874        let r = vminps(a, b, SAE);
9875        transmute(r)
9876    }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889    src: __m512,
9890    k: __mmask16,
9891    a: __m512,
9892    b: __m512,
9893) -> __m512 {
9894    unsafe {
9895        static_assert_sae!(SAE);
9896        let a = a.as_f32x16();
9897        let b = b.as_f32x16();
9898        let r = vminps(a, b, SAE);
9899        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9900    }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913    unsafe {
9914        static_assert_sae!(SAE);
9915        let a = a.as_f32x16();
9916        let b = b.as_f32x16();
9917        let r = vminps(a, b, SAE);
9918        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9919    }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932    unsafe {
9933        static_assert_sae!(SAE);
9934        let a = a.as_f64x8();
9935        let b = b.as_f64x8();
9936        let r = vminpd(a, b, SAE);
9937        transmute(r)
9938    }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951    src: __m512d,
9952    k: __mmask8,
9953    a: __m512d,
9954    b: __m512d,
9955) -> __m512d {
9956    unsafe {
9957        static_assert_sae!(SAE);
9958        let a = a.as_f64x8();
9959        let b = b.as_f64x8();
9960        let r = vminpd(a, b, SAE);
9961        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9962    }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975    unsafe {
9976        static_assert_sae!(SAE);
9977        let a = a.as_f64x8();
9978        let b = b.as_f64x8();
9979        let r = vminpd(a, b, SAE);
9980        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9981    }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994    unsafe {
9995        static_assert_sae!(SAE);
9996        let a = a.as_f32x16();
9997        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
9998        transmute(r)
9999    }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012    unsafe {
10013        static_assert_sae!(SAE);
10014        let a = a.as_f32x16();
10015        let src = src.as_f32x16();
10016        let r = vgetexpps(a, src, k, SAE);
10017        transmute(r)
10018    }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031    unsafe {
10032        static_assert_sae!(SAE);
10033        let a = a.as_f32x16();
10034        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10035        transmute(r)
10036    }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049    unsafe {
10050        static_assert_sae!(SAE);
10051        let a = a.as_f64x8();
10052        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10053        transmute(r)
10054    }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067    src: __m512d,
10068    k: __mmask8,
10069    a: __m512d,
10070) -> __m512d {
10071    unsafe {
10072        static_assert_sae!(SAE);
10073        let a = a.as_f64x8();
10074        let src = src.as_f64x8();
10075        let r = vgetexppd(a, src, k, SAE);
10076        transmute(r)
10077    }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090    unsafe {
10091        static_assert_sae!(SAE);
10092        let a = a.as_f64x8();
10093        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114    unsafe {
10115        static_assert_uimm_bits!(IMM8, 8);
10116        static_assert_mantissas_sae!(SAE);
10117        let a = a.as_f32x16();
10118        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10119        transmute(r)
10120    }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139    src: __m512,
10140    k: __mmask16,
10141    a: __m512,
10142) -> __m512 {
10143    unsafe {
10144        static_assert_uimm_bits!(IMM8, 8);
10145        static_assert_mantissas_sae!(SAE);
10146        let a = a.as_f32x16();
10147        let src = src.as_f32x16();
10148        let r = vrndscaleps(a, IMM8, src, k, SAE);
10149        transmute(r)
10150    }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169    k: __mmask16,
10170    a: __m512,
10171) -> __m512 {
10172    unsafe {
10173        static_assert_uimm_bits!(IMM8, 8);
10174        static_assert_mantissas_sae!(SAE);
10175        let a = a.as_f32x16();
10176        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10177        transmute(r)
10178    }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197    unsafe {
10198        static_assert_uimm_bits!(IMM8, 8);
10199        static_assert_mantissas_sae!(SAE);
10200        let a = a.as_f64x8();
10201        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10202        transmute(r)
10203    }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222    src: __m512d,
10223    k: __mmask8,
10224    a: __m512d,
10225) -> __m512d {
10226    unsafe {
10227        static_assert_uimm_bits!(IMM8, 8);
10228        static_assert_mantissas_sae!(SAE);
10229        let a = a.as_f64x8();
10230        let src = src.as_f64x8();
10231        let r = vrndscalepd(a, IMM8, src, k, SAE);
10232        transmute(r)
10233    }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252    k: __mmask8,
10253    a: __m512d,
10254) -> __m512d {
10255    unsafe {
10256        static_assert_uimm_bits!(IMM8, 8);
10257        static_assert_mantissas_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10260        transmute(r)
10261    }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280    unsafe {
10281        static_assert_rounding!(ROUNDING);
10282        let a = a.as_f32x16();
10283        let b = b.as_f32x16();
10284        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10285        transmute(r)
10286    }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305    src: __m512,
10306    k: __mmask16,
10307    a: __m512,
10308    b: __m512,
10309) -> __m512 {
10310    unsafe {
10311        static_assert_rounding!(ROUNDING);
10312        let a = a.as_f32x16();
10313        let b = b.as_f32x16();
10314        let src = src.as_f32x16();
10315        let r = vscalefps(a, b, src, k, ROUNDING);
10316        transmute(r)
10317    }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336    k: __mmask16,
10337    a: __m512,
10338    b: __m512,
10339) -> __m512 {
10340    unsafe {
10341        static_assert_rounding!(ROUNDING);
10342        let a = a.as_f32x16();
10343        let b = b.as_f32x16();
10344        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10345        transmute(r)
10346    }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365    unsafe {
10366        static_assert_rounding!(ROUNDING);
10367        let a = a.as_f64x8();
10368        let b = b.as_f64x8();
10369        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10370        transmute(r)
10371    }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390    src: __m512d,
10391    k: __mmask8,
10392    a: __m512d,
10393    b: __m512d,
10394) -> __m512d {
10395    unsafe {
10396        static_assert_rounding!(ROUNDING);
10397        let a = a.as_f64x8();
10398        let b = b.as_f64x8();
10399        let src = src.as_f64x8();
10400        let r = vscalefpd(a, b, src, k, ROUNDING);
10401        transmute(r)
10402    }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421    k: __mmask8,
10422    a: __m512d,
10423    b: __m512d,
10424) -> __m512d {
10425    unsafe {
10426        static_assert_rounding!(ROUNDING);
10427        let a = a.as_f64x8();
10428        let b = b.as_f64x8();
10429        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10430        transmute(r)
10431    }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444    a: __m512,
10445    b: __m512,
10446    c: __m512i,
10447) -> __m512 {
10448    unsafe {
10449        static_assert_uimm_bits!(IMM8, 8);
10450        static_assert_mantissas_sae!(SAE);
10451        let a = a.as_f32x16();
10452        let b = b.as_f32x16();
10453        let c = c.as_i32x16();
10454        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10455        transmute(r)
10456    }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469    a: __m512,
10470    k: __mmask16,
10471    b: __m512,
10472    c: __m512i,
10473) -> __m512 {
10474    unsafe {
10475        static_assert_uimm_bits!(IMM8, 8);
10476        static_assert_mantissas_sae!(SAE);
10477        let a = a.as_f32x16();
10478        let b = b.as_f32x16();
10479        let c = c.as_i32x16();
10480        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10481        transmute(r)
10482    }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495    k: __mmask16,
10496    a: __m512,
10497    b: __m512,
10498    c: __m512i,
10499) -> __m512 {
10500    unsafe {
10501        static_assert_uimm_bits!(IMM8, 8);
10502        static_assert_mantissas_sae!(SAE);
10503        let a = a.as_f32x16();
10504        let b = b.as_f32x16();
10505        let c = c.as_i32x16();
10506        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10507        transmute(r)
10508    }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521    a: __m512d,
10522    b: __m512d,
10523    c: __m512i,
10524) -> __m512d {
10525    unsafe {
10526        static_assert_uimm_bits!(IMM8, 8);
10527        static_assert_mantissas_sae!(SAE);
10528        let a = a.as_f64x8();
10529        let b = b.as_f64x8();
10530        let c = c.as_i64x8();
10531        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10532        transmute(r)
10533    }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546    a: __m512d,
10547    k: __mmask8,
10548    b: __m512d,
10549    c: __m512i,
10550) -> __m512d {
10551    unsafe {
10552        static_assert_uimm_bits!(IMM8, 8);
10553        static_assert_mantissas_sae!(SAE);
10554        let a = a.as_f64x8();
10555        let b = b.as_f64x8();
10556        let c = c.as_i64x8();
10557        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10558        transmute(r)
10559    }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572    k: __mmask8,
10573    a: __m512d,
10574    b: __m512d,
10575    c: __m512i,
10576) -> __m512d {
10577    unsafe {
10578        static_assert_uimm_bits!(IMM8, 8);
10579        static_assert_mantissas_sae!(SAE);
10580        let a = a.as_f64x8();
10581        let b = b.as_f64x8();
10582        let c = c.as_i64x8();
10583        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10584        transmute(r)
10585    }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595///    _MM_MANT_SIGN_src     // sign = sign(src)\
10596///    _MM_MANT_SIGN_zero    // sign = 0\
10597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607    const NORM: _MM_MANTISSA_NORM_ENUM,
10608    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609    const SAE: i32,
10610>(
10611    a: __m512,
10612) -> __m512 {
10613    unsafe {
10614        static_assert_uimm_bits!(NORM, 4);
10615        static_assert_uimm_bits!(SIGN, 2);
10616        static_assert_mantissas_sae!(SAE);
10617        let a = a.as_f32x16();
10618        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10619        transmute(r)
10620    }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10626///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10627///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10628///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630///    _MM_MANT_SIGN_src     // sign = sign(src)\
10631///    _MM_MANT_SIGN_zero    // sign = 0\
10632///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642    const NORM: _MM_MANTISSA_NORM_ENUM,
10643    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644    const SAE: i32,
10645>(
10646    src: __m512,
10647    k: __mmask16,
10648    a: __m512,
10649) -> __m512 {
10650    unsafe {
10651        static_assert_uimm_bits!(NORM, 4);
10652        static_assert_uimm_bits!(SIGN, 2);
10653        static_assert_mantissas_sae!(SAE);
10654        let a = a.as_f32x16();
10655        let src = src.as_f32x16();
10656        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10657        transmute(r)
10658    }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10664///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10665///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10666///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668///    _MM_MANT_SIGN_src     // sign = sign(src)\
10669///    _MM_MANT_SIGN_zero    // sign = 0\
10670///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680    const NORM: _MM_MANTISSA_NORM_ENUM,
10681    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682    const SAE: i32,
10683>(
10684    k: __mmask16,
10685    a: __m512,
10686) -> __m512 {
10687    unsafe {
10688        static_assert_uimm_bits!(NORM, 4);
10689        static_assert_uimm_bits!(SIGN, 2);
10690        static_assert_mantissas_sae!(SAE);
10691        let a = a.as_f32x16();
10692        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10693        transmute(r)
10694    }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10700///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10701///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10702///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704///    _MM_MANT_SIGN_src     // sign = sign(src)\
10705///    _MM_MANT_SIGN_zero    // sign = 0\
10706///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716    const NORM: _MM_MANTISSA_NORM_ENUM,
10717    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718    const SAE: i32,
10719>(
10720    a: __m512d,
10721) -> __m512d {
10722    unsafe {
10723        static_assert_uimm_bits!(NORM, 4);
10724        static_assert_uimm_bits!(SIGN, 2);
10725        static_assert_mantissas_sae!(SAE);
10726        let a = a.as_f64x8();
10727        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
10728        transmute(r)
10729    }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10735///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10736///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10737///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739///    _MM_MANT_SIGN_src     // sign = sign(src)\
10740///    _MM_MANT_SIGN_zero    // sign = 0\
10741///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751    const NORM: _MM_MANTISSA_NORM_ENUM,
10752    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753    const SAE: i32,
10754>(
10755    src: __m512d,
10756    k: __mmask8,
10757    a: __m512d,
10758) -> __m512d {
10759    unsafe {
10760        static_assert_uimm_bits!(NORM, 4);
10761        static_assert_uimm_bits!(SIGN, 2);
10762        static_assert_mantissas_sae!(SAE);
10763        let a = a.as_f64x8();
10764        let src = src.as_f64x8();
10765        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
10766        transmute(r)
10767    }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10773///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10774///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10775///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777///    _MM_MANT_SIGN_src     // sign = sign(src)\
10778///    _MM_MANT_SIGN_zero    // sign = 0\
10779///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789    const NORM: _MM_MANTISSA_NORM_ENUM,
10790    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791    const SAE: i32,
10792>(
10793    k: __mmask8,
10794    a: __m512d,
10795) -> __m512d {
10796    unsafe {
10797        static_assert_uimm_bits!(NORM, 4);
10798        static_assert_uimm_bits!(SIGN, 2);
10799        static_assert_mantissas_sae!(SAE);
10800        let a = a.as_f64x8();
10801        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
10802        transmute(r)
10803    }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814    unsafe {
10815        transmute(vcvtps2dq(
10816            a.as_f32x16(),
10817            i32x16::ZERO,
10818            0b11111111_11111111,
10819            _MM_FROUND_CUR_DIRECTION,
10820        ))
10821    }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832    unsafe {
10833        transmute(vcvtps2dq(
10834            a.as_f32x16(),
10835            src.as_i32x16(),
10836            k,
10837            _MM_FROUND_CUR_DIRECTION,
10838        ))
10839    }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850    unsafe {
10851        transmute(vcvtps2dq(
10852            a.as_f32x16(),
10853            i32x16::ZERO,
10854            k,
10855            _MM_FROUND_CUR_DIRECTION,
10856        ))
10857    }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868    unsafe {
10869        let convert = _mm256_cvtps_epi32(a);
10870        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
10871    }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882    unsafe {
10883        let convert = _mm256_cvtps_epi32(a);
10884        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
10885    }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896    unsafe {
10897        let convert = _mm_cvtps_epi32(a);
10898        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10899    }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910    unsafe {
10911        let convert = _mm_cvtps_epi32(a);
10912        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10913    }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924    unsafe {
10925        transmute(vcvtps2udq(
10926            a.as_f32x16(),
10927            u32x16::ZERO,
10928            0b11111111_11111111,
10929            _MM_FROUND_CUR_DIRECTION,
10930        ))
10931    }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942    unsafe {
10943        transmute(vcvtps2udq(
10944            a.as_f32x16(),
10945            src.as_u32x16(),
10946            k,
10947            _MM_FROUND_CUR_DIRECTION,
10948        ))
10949    }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960    unsafe {
10961        transmute(vcvtps2udq(
10962            a.as_f32x16(),
10963            u32x16::ZERO,
10964            k,
10965            _MM_FROUND_CUR_DIRECTION,
10966        ))
10967    }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044    unsafe {
11045        transmute(vcvtps2pd(
11046            a.as_f32x8(),
11047            f64x8::ZERO,
11048            0b11111111,
11049            _MM_FROUND_CUR_DIRECTION,
11050        ))
11051    }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062    unsafe {
11063        transmute(vcvtps2pd(
11064            a.as_f32x8(),
11065            src.as_f64x8(),
11066            k,
11067            _MM_FROUND_CUR_DIRECTION,
11068        ))
11069    }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080    unsafe {
11081        transmute(vcvtps2pd(
11082            a.as_f32x8(),
11083            f64x8::ZERO,
11084            k,
11085            _MM_FROUND_CUR_DIRECTION,
11086        ))
11087    }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098    unsafe {
11099        transmute(vcvtps2pd(
11100            _mm512_castps512_ps256(v2).as_f32x8(),
11101            f64x8::ZERO,
11102            0b11111111,
11103            _MM_FROUND_CUR_DIRECTION,
11104        ))
11105    }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116    unsafe {
11117        transmute(vcvtps2pd(
11118            _mm512_castps512_ps256(v2).as_f32x8(),
11119            src.as_f64x8(),
11120            k,
11121            _MM_FROUND_CUR_DIRECTION,
11122        ))
11123    }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134    unsafe {
11135        transmute(vcvtpd2ps(
11136            a.as_f64x8(),
11137            f32x8::ZERO,
11138            0b11111111,
11139            _MM_FROUND_CUR_DIRECTION,
11140        ))
11141    }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152    unsafe {
11153        transmute(vcvtpd2ps(
11154            a.as_f64x8(),
11155            src.as_f32x8(),
11156            k,
11157            _MM_FROUND_CUR_DIRECTION,
11158        ))
11159    }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170    unsafe {
11171        transmute(vcvtpd2ps(
11172            a.as_f64x8(),
11173            f32x8::ZERO,
11174            k,
11175            _MM_FROUND_CUR_DIRECTION,
11176        ))
11177    }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188    unsafe {
11189        let convert = _mm256_cvtpd_ps(a);
11190        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11191    }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202    unsafe {
11203        let convert = _mm256_cvtpd_ps(a);
11204        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11205    }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216    unsafe {
11217        let convert = _mm_cvtpd_ps(a);
11218        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11219    }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230    unsafe {
11231        let convert = _mm_cvtpd_ps(a);
11232        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11233    }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244    unsafe {
11245        transmute(vcvtpd2dq(
11246            a.as_f64x8(),
11247            i32x8::ZERO,
11248            0b11111111,
11249            _MM_FROUND_CUR_DIRECTION,
11250        ))
11251    }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262    unsafe {
11263        transmute(vcvtpd2dq(
11264            a.as_f64x8(),
11265            src.as_i32x8(),
11266            k,
11267            _MM_FROUND_CUR_DIRECTION,
11268        ))
11269    }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280    unsafe {
11281        transmute(vcvtpd2dq(
11282            a.as_f64x8(),
11283            i32x8::ZERO,
11284            k,
11285            _MM_FROUND_CUR_DIRECTION,
11286        ))
11287    }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298    unsafe {
11299        let convert = _mm256_cvtpd_epi32(a);
11300        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11301    }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312    unsafe {
11313        let convert = _mm256_cvtpd_epi32(a);
11314        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11315    }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326    unsafe {
11327        let convert = _mm_cvtpd_epi32(a);
11328        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11329    }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340    unsafe {
11341        let convert = _mm_cvtpd_epi32(a);
11342        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11343    }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354    unsafe {
11355        transmute(vcvtpd2udq(
11356            a.as_f64x8(),
11357            u32x8::ZERO,
11358            0b11111111,
11359            _MM_FROUND_CUR_DIRECTION,
11360        ))
11361    }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372    unsafe {
11373        transmute(vcvtpd2udq(
11374            a.as_f64x8(),
11375            src.as_u32x8(),
11376            k,
11377            _MM_FROUND_CUR_DIRECTION,
11378        ))
11379    }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390    unsafe {
11391        transmute(vcvtpd2udq(
11392            a.as_f64x8(),
11393            u32x8::ZERO,
11394            k,
11395            _MM_FROUND_CUR_DIRECTION,
11396        ))
11397    }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474    unsafe {
11475        let r: f32x8 = vcvtpd2ps(
11476            v2.as_f64x8(),
11477            f32x8::ZERO,
11478            0b11111111,
11479            _MM_FROUND_CUR_DIRECTION,
11480        );
11481        simd_shuffle!(
11482            r,
11483            f32x8::ZERO,
11484            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485        )
11486    }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497    unsafe {
11498        let r: f32x8 = vcvtpd2ps(
11499            v2.as_f64x8(),
11500            _mm512_castps512_ps256(src).as_f32x8(),
11501            k,
11502            _MM_FROUND_CUR_DIRECTION,
11503        );
11504        simd_shuffle!(
11505            r,
11506            f32x8::ZERO,
11507            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508        )
11509    }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520    unsafe {
11521        let a = a.as_i8x16();
11522        transmute::<i32x16, _>(simd_cast(a))
11523    }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534    unsafe {
11535        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11536        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11537    }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548    unsafe {
11549        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11550        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11551    }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562    unsafe {
11563        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11564        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11565    }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576    unsafe {
11577        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11578        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11579    }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590    unsafe {
11591        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11592        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11593    }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604    unsafe {
11605        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11606        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11607    }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618    unsafe {
11619        let a = a.as_i8x16();
11620        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621        transmute::<i64x8, _>(simd_cast(v64))
11622    }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633    unsafe {
11634        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11635        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11636    }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647    unsafe {
11648        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11649        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11650    }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661    unsafe {
11662        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11663        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11664    }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675    unsafe {
11676        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11677        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11678    }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe {
11690        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11691        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11692    }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703    unsafe {
11704        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11705        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11706    }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717    unsafe {
11718        let a = a.as_u8x16();
11719        transmute::<i32x16, _>(simd_cast(a))
11720    }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731    unsafe {
11732        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11733        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11734    }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745    unsafe {
11746        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11747        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11748    }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759    unsafe {
11760        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11761        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11762    }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773    unsafe {
11774        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11775        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11776    }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787    unsafe {
11788        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11789        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11790    }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801    unsafe {
11802        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11803        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11804    }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815    unsafe {
11816        let a = a.as_u8x16();
11817        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818        transmute::<i64x8, _>(simd_cast(v64))
11819    }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830    unsafe {
11831        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11832        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11833    }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844    unsafe {
11845        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11846        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11847    }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858    unsafe {
11859        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11860        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11861    }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872    unsafe {
11873        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11874        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11875    }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11888        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11889    }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11902        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11903    }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914    unsafe {
11915        let a = a.as_i16x16();
11916        transmute::<i32x16, _>(simd_cast(a))
11917    }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928    unsafe {
11929        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11930        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11931    }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942    unsafe {
11943        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11944        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11945    }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956    unsafe {
11957        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11958        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11959    }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970    unsafe {
11971        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11972        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11973    }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984    unsafe {
11985        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11986        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11987    }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998    unsafe {
11999        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12000        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12001    }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012    unsafe {
12013        let a = a.as_i16x8();
12014        transmute::<i64x8, _>(simd_cast(a))
12015    }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026    unsafe {
12027        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12028        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12029    }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040    unsafe {
12041        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12042        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12043    }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054    unsafe {
12055        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12056        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12057    }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068    unsafe {
12069        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12070        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12071    }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12084        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12085    }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096    unsafe {
12097        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12098        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12099    }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110    unsafe {
12111        let a = a.as_u16x16();
12112        transmute::<i32x16, _>(simd_cast(a))
12113    }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124    unsafe {
12125        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12126        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12127    }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138    unsafe {
12139        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12140        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12141    }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152    unsafe {
12153        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12154        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12155    }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166    unsafe {
12167        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12168        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12169    }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180    unsafe {
12181        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12182        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12183    }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194    unsafe {
12195        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12196        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12197    }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208    unsafe {
12209        let a = a.as_u16x8();
12210        transmute::<i64x8, _>(simd_cast(a))
12211    }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222    unsafe {
12223        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12224        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12225    }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236    unsafe {
12237        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12238        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12239    }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250    unsafe {
12251        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12252        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12253    }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264    unsafe {
12265        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12266        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12267    }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278    unsafe {
12279        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12280        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12281    }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292    unsafe {
12293        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12294        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12295    }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306    unsafe {
12307        let a = a.as_i32x8();
12308        transmute::<i64x8, _>(simd_cast(a))
12309    }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320    unsafe {
12321        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12322        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12323    }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334    unsafe {
12335        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12336        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12337    }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348    unsafe {
12349        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12350        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12351    }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362    unsafe {
12363        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12364        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12365    }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376    unsafe {
12377        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12378        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12379    }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390    unsafe {
12391        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12392        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12393    }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404    unsafe {
12405        let a = a.as_u32x8();
12406        transmute::<i64x8, _>(simd_cast(a))
12407    }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418    unsafe {
12419        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12420        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12421    }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432    unsafe {
12433        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12434        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12435    }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446    unsafe {
12447        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12448        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12449    }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460    unsafe {
12461        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12462        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12463    }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474    unsafe {
12475        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12476        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12477    }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488    unsafe {
12489        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12490        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12491    }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502    unsafe {
12503        let a = a.as_i32x16();
12504        transmute::<f32x16, _>(simd_cast(a))
12505    }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516    unsafe {
12517        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12518        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12519    }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530    unsafe {
12531        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12532        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12533    }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544    unsafe {
12545        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12546        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12547    }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558    unsafe {
12559        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12560        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12561    }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572    unsafe {
12573        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12574        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12575    }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586    unsafe {
12587        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12588        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12589    }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600    unsafe {
12601        let a = a.as_i32x8();
12602        transmute::<f64x8, _>(simd_cast(a))
12603    }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614    unsafe {
12615        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12616        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12617    }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628    unsafe {
12629        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12630        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12631    }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642    unsafe {
12643        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12644        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12645    }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656    unsafe {
12657        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12658        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12659    }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670    unsafe {
12671        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12672        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12673    }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684    unsafe {
12685        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12686        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12687    }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698    unsafe {
12699        let a = a.as_u32x16();
12700        transmute::<f32x16, _>(simd_cast(a))
12701    }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712    unsafe {
12713        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12714        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12715    }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726    unsafe {
12727        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12728        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12729    }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740    unsafe {
12741        let a = a.as_u32x8();
12742        transmute::<f64x8, _>(simd_cast(a))
12743    }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754    unsafe {
12755        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12756        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12757    }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768    unsafe {
12769        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12770        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12771    }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782    unsafe {
12783        let a = a.as_u32x4();
12784        transmute::<f64x4, _>(simd_cast(a))
12785    }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796    unsafe {
12797        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12798        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12799    }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810    unsafe {
12811        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12812        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12813    }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824    unsafe {
12825        let a = a.as_u32x4();
12826        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827        transmute::<f64x2, _>(simd_cast(u64))
12828    }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839    unsafe {
12840        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12841        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12842    }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853    unsafe {
12854        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12855        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12856    }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867    unsafe {
12868        let v2 = v2.as_i32x16();
12869        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870        transmute::<f64x8, _>(simd_cast(v256))
12871    }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882    unsafe {
12883        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12885    }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896    unsafe {
12897        let v2 = v2.as_u32x16();
12898        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899        transmute::<f64x8, _>(simd_cast(v256))
12900    }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911    unsafe {
12912        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12914    }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925    unsafe {
12926        let a = a.as_i32x16();
12927        transmute::<i16x16, _>(simd_cast(a))
12928    }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939    unsafe {
12940        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12941        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12942    }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953    unsafe {
12954        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12955        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12956    }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967    unsafe {
12968        let a = a.as_i32x8();
12969        transmute::<i16x8, _>(simd_cast(a))
12970    }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981    unsafe {
12982        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12983        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12984    }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995    unsafe {
12996        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12997        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12998    }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042    unsafe {
13043        let a = a.as_i32x16();
13044        transmute::<i8x16, _>(simd_cast(a))
13045    }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056    unsafe {
13057        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13058        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13059    }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070    unsafe {
13071        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13072        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13073    }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150    unsafe {
13151        let a = a.as_i64x8();
13152        transmute::<i32x8, _>(simd_cast(a))
13153    }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164    unsafe {
13165        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13166        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13167    }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178    unsafe {
13179        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13180        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13181    }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192    unsafe {
13193        let a = a.as_i64x4();
13194        transmute::<i32x4, _>(simd_cast(a))
13195    }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206    unsafe {
13207        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13208        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13209    }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220    unsafe {
13221        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13222        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13223    }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267    unsafe {
13268        let a = a.as_i64x8();
13269        transmute::<i16x8, _>(simd_cast(a))
13270    }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281    unsafe {
13282        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13283        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13284    }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295    unsafe {
13296        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13297        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13298    }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472    unsafe {
14473        static_assert_rounding!(ROUNDING);
14474        let a = a.as_f32x16();
14475        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14476        transmute(r)
14477    }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496    src: __m512i,
14497    k: __mmask16,
14498    a: __m512,
14499) -> __m512i {
14500    unsafe {
14501        static_assert_rounding!(ROUNDING);
14502        let a = a.as_f32x16();
14503        let src = src.as_i32x16();
14504        let r = vcvtps2dq(a, src, k, ROUNDING);
14505        transmute(r)
14506    }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525    unsafe {
14526        static_assert_rounding!(ROUNDING);
14527        let a = a.as_f32x16();
14528        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14529        transmute(r)
14530    }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549    unsafe {
14550        static_assert_rounding!(ROUNDING);
14551        let a = a.as_f32x16();
14552        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14553        transmute(r)
14554    }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573    src: __m512i,
14574    k: __mmask16,
14575    a: __m512,
14576) -> __m512i {
14577    unsafe {
14578        static_assert_rounding!(ROUNDING);
14579        let a = a.as_f32x16();
14580        let src = src.as_u32x16();
14581        let r = vcvtps2udq(a, src, k, ROUNDING);
14582        transmute(r)
14583    }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602    unsafe {
14603        static_assert_rounding!(ROUNDING);
14604        let a = a.as_f32x16();
14605        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14606        transmute(r)
14607    }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620    unsafe {
14621        static_assert_sae!(SAE);
14622        let a = a.as_f32x8();
14623        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
14624        transmute(r)
14625    }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638    unsafe {
14639        static_assert_sae!(SAE);
14640        let a = a.as_f32x8();
14641        let src = src.as_f64x8();
14642        let r = vcvtps2pd(a, src, k, SAE);
14643        transmute(r)
14644    }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657    unsafe {
14658        static_assert_sae!(SAE);
14659        let a = a.as_f32x8();
14660        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
14661        transmute(r)
14662    }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681    unsafe {
14682        static_assert_rounding!(ROUNDING);
14683        let a = a.as_f64x8();
14684        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
14685        transmute(r)
14686    }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705    src: __m256i,
14706    k: __mmask8,
14707    a: __m512d,
14708) -> __m256i {
14709    unsafe {
14710        static_assert_rounding!(ROUNDING);
14711        let a = a.as_f64x8();
14712        let src = src.as_i32x8();
14713        let r = vcvtpd2dq(a, src, k, ROUNDING);
14714        transmute(r)
14715    }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734    unsafe {
14735        static_assert_rounding!(ROUNDING);
14736        let a = a.as_f64x8();
14737        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
14738        transmute(r)
14739    }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758    unsafe {
14759        static_assert_rounding!(ROUNDING);
14760        let a = a.as_f64x8();
14761        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
14762        transmute(r)
14763    }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782    src: __m256i,
14783    k: __mmask8,
14784    a: __m512d,
14785) -> __m256i {
14786    unsafe {
14787        static_assert_rounding!(ROUNDING);
14788        let a = a.as_f64x8();
14789        let src = src.as_u32x8();
14790        let r = vcvtpd2udq(a, src, k, ROUNDING);
14791        transmute(r)
14792    }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811    unsafe {
14812        static_assert_rounding!(ROUNDING);
14813        let a = a.as_f64x8();
14814        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
14815        transmute(r)
14816    }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835    unsafe {
14836        static_assert_rounding!(ROUNDING);
14837        let a = a.as_f64x8();
14838        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
14839        transmute(r)
14840    }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859    src: __m256,
14860    k: __mmask8,
14861    a: __m512d,
14862) -> __m256 {
14863    unsafe {
14864        static_assert_rounding!(ROUNDING);
14865        let a = a.as_f64x8();
14866        let src = src.as_f32x8();
14867        let r = vcvtpd2ps(a, src, k, ROUNDING);
14868        transmute(r)
14869    }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888    unsafe {
14889        static_assert_rounding!(ROUNDING);
14890        let a = a.as_f64x8();
14891        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
14892        transmute(r)
14893    }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912    unsafe {
14913        static_assert_rounding!(ROUNDING);
14914        let a = a.as_i32x16();
14915        let r = vcvtdq2ps(a, ROUNDING);
14916        transmute(r)
14917    }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936    src: __m512,
14937    k: __mmask16,
14938    a: __m512i,
14939) -> __m512 {
14940    unsafe {
14941        static_assert_rounding!(ROUNDING);
14942        let a = a.as_i32x16();
14943        let r = vcvtdq2ps(a, ROUNDING);
14944        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
14945    }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964    unsafe {
14965        static_assert_rounding!(ROUNDING);
14966        let a = a.as_i32x16();
14967        let r = vcvtdq2ps(a, ROUNDING);
14968        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
14969    }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988    unsafe {
14989        static_assert_rounding!(ROUNDING);
14990        let a = a.as_u32x16();
14991        let r = vcvtudq2ps(a, ROUNDING);
14992        transmute(r)
14993    }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012    src: __m512,
15013    k: __mmask16,
15014    a: __m512i,
15015) -> __m512 {
15016    unsafe {
15017        static_assert_rounding!(ROUNDING);
15018        let a = a.as_u32x16();
15019        let r = vcvtudq2ps(a, ROUNDING);
15020        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15021    }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040    unsafe {
15041        static_assert_rounding!(ROUNDING);
15042        let a = a.as_u32x16();
15043        let r = vcvtudq2ps(a, ROUNDING);
15044        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15045    }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15050///
15051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15052#[inline]
15053#[target_feature(enable = "avx512f")]
15054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15055#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15056#[rustc_legacy_const_generics(1)]
15057pub fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
15058    unsafe {
15059        static_assert_sae!(SAE);
15060        let a = a.as_f32x16();
15061        let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15062        transmute(r)
15063    }
15064}
15065
15066/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15067/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15068///
15069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15070#[inline]
15071#[target_feature(enable = "avx512f")]
15072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15073#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15074#[rustc_legacy_const_generics(3)]
15075pub fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
15076    src: __m256i,
15077    k: __mmask16,
15078    a: __m512,
15079) -> __m256i {
15080    unsafe {
15081        static_assert_sae!(SAE);
15082        let a = a.as_f32x16();
15083        let src = src.as_i16x16();
15084        let r = vcvtps2ph(a, SAE, src, k);
15085        transmute(r)
15086    }
15087}
15088
15089/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15090/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15091///
15092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15093#[inline]
15094#[target_feature(enable = "avx512f")]
15095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15096#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15097#[rustc_legacy_const_generics(2)]
15098pub fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15099    unsafe {
15100        static_assert_sae!(SAE);
15101        let a = a.as_f32x16();
15102        let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15103        transmute(r)
15104    }
15105}
15106
15107/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15108/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15109/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15110/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15111/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15112/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15113/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15114///
15115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15116#[inline]
15117#[target_feature(enable = "avx512f,avx512vl")]
15118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15119#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15120#[rustc_legacy_const_generics(3)]
15121pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15122    src: __m128i,
15123    k: __mmask8,
15124    a: __m256,
15125) -> __m128i {
15126    unsafe {
15127        static_assert_uimm_bits!(IMM8, 8);
15128        let a = a.as_f32x8();
15129        let src = src.as_i16x8();
15130        let r = vcvtps2ph256(a, IMM8, src, k);
15131        transmute(r)
15132    }
15133}
15134
15135/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15136/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15137/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15138/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15139/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15140/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15141/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15142///
15143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15144#[inline]
15145#[target_feature(enable = "avx512f,avx512vl")]
15146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15147#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15148#[rustc_legacy_const_generics(2)]
15149pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15150    unsafe {
15151        static_assert_uimm_bits!(IMM8, 8);
15152        let a = a.as_f32x8();
15153        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15154        transmute(r)
15155    }
15156}
15157
15158/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15159/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15160/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15161/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15162/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15163/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15165///
15166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15167#[inline]
15168#[target_feature(enable = "avx512f,avx512vl")]
15169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15170#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15171#[rustc_legacy_const_generics(3)]
15172pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15173    unsafe {
15174        static_assert_uimm_bits!(IMM8, 8);
15175        let a = a.as_f32x4();
15176        let src = src.as_i16x8();
15177        let r = vcvtps2ph128(a, IMM8, src, k);
15178        transmute(r)
15179    }
15180}
15181
15182/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15183/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15184/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15185/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15186/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15187/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15188/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15189///
15190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15191#[inline]
15192#[target_feature(enable = "avx512f,avx512vl")]
15193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15194#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15195#[rustc_legacy_const_generics(2)]
15196pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15197    unsafe {
15198        static_assert_uimm_bits!(IMM8, 8);
15199        let a = a.as_f32x4();
15200        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15201        transmute(r)
15202    }
15203}
15204
15205/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15206/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15207///
15208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15209#[inline]
15210#[target_feature(enable = "avx512f")]
15211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15212#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15213#[rustc_legacy_const_generics(1)]
15214pub fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
15215    unsafe {
15216        static_assert_sae!(SAE);
15217        let a = a.as_f32x16();
15218        let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15219        transmute(r)
15220    }
15221}
15222
15223/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15225///
15226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15227#[inline]
15228#[target_feature(enable = "avx512f")]
15229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15230#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15231#[rustc_legacy_const_generics(3)]
15232pub fn _mm512_mask_cvtps_ph<const SAE: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15233    unsafe {
15234        static_assert_sae!(SAE);
15235        let a = a.as_f32x16();
15236        let src = src.as_i16x16();
15237        let r = vcvtps2ph(a, SAE, src, k);
15238        transmute(r)
15239    }
15240}
15241
15242/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15249#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15250#[rustc_legacy_const_generics(2)]
15251pub fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15252    unsafe {
15253        static_assert_sae!(SAE);
15254        let a = a.as_f32x16();
15255        let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15256        transmute(r)
15257    }
15258}
15259
15260/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15261/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15262/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15263/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15264/// * [`_MM_FROUND_TO_POS_INF`] : round up
15265/// * [`_MM_FROUND_TO_ZERO`] : truncate
15266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15267///
15268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15269#[inline]
15270#[target_feature(enable = "avx512f,avx512vl")]
15271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15272#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15273#[rustc_legacy_const_generics(3)]
15274pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15275    unsafe {
15276        static_assert_uimm_bits!(IMM8, 8);
15277        let a = a.as_f32x8();
15278        let src = src.as_i16x8();
15279        let r = vcvtps2ph256(a, IMM8, src, k);
15280        transmute(r)
15281    }
15282}
15283
15284/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15285/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15286/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15287/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15288/// * [`_MM_FROUND_TO_POS_INF`] : round up
15289/// * [`_MM_FROUND_TO_ZERO`] : truncate
15290/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15291///
15292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15293#[inline]
15294#[target_feature(enable = "avx512f,avx512vl")]
15295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15296#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15297#[rustc_legacy_const_generics(2)]
15298pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15299    unsafe {
15300        static_assert_uimm_bits!(IMM8, 8);
15301        let a = a.as_f32x8();
15302        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15303        transmute(r)
15304    }
15305}
15306
15307/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15308/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15309/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15310/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15311/// * [`_MM_FROUND_TO_POS_INF`] : round up
15312/// * [`_MM_FROUND_TO_ZERO`] : truncate
15313/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15314///
15315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15316#[inline]
15317#[target_feature(enable = "avx512f,avx512vl")]
15318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15319#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15320#[rustc_legacy_const_generics(3)]
15321pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15322    unsafe {
15323        static_assert_uimm_bits!(IMM8, 8);
15324        let a = a.as_f32x4();
15325        let src = src.as_i16x8();
15326        let r = vcvtps2ph128(a, IMM8, src, k);
15327        transmute(r)
15328    }
15329}
15330
15331/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15332/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15333/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15334/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15335/// * [`_MM_FROUND_TO_POS_INF`] : round up
15336/// * [`_MM_FROUND_TO_ZERO`] : truncate
15337/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15338///
15339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15340#[inline]
15341#[target_feature(enable = "avx512f,avx512vl")]
15342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15343#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15344#[rustc_legacy_const_generics(2)]
15345pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15346    unsafe {
15347        static_assert_uimm_bits!(IMM8, 8);
15348        let a = a.as_f32x4();
15349        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15350        transmute(r)
15351    }
15352}
15353
15354/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15355/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15356///
15357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15358#[inline]
15359#[target_feature(enable = "avx512f")]
15360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15361#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15362#[rustc_legacy_const_generics(1)]
15363pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15364    unsafe {
15365        static_assert_sae!(SAE);
15366        let a = a.as_i16x16();
15367        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15368        transmute(r)
15369    }
15370}
15371
15372/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15373/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15376#[inline]
15377#[target_feature(enable = "avx512f")]
15378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15379#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15382    unsafe {
15383        static_assert_sae!(SAE);
15384        let a = a.as_i16x16();
15385        let src = src.as_f32x16();
15386        let r = vcvtph2ps(a, src, k, SAE);
15387        transmute(r)
15388    }
15389}
15390
15391/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15393///
15394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15395#[inline]
15396#[target_feature(enable = "avx512f")]
15397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15398#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15399#[rustc_legacy_const_generics(2)]
15400pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15401    unsafe {
15402        static_assert_sae!(SAE);
15403        let a = a.as_i16x16();
15404        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15405        transmute(r)
15406    }
15407}
15408
15409/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15410///
15411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15412#[inline]
15413#[target_feature(enable = "avx512f")]
15414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15415#[cfg_attr(test, assert_instr(vcvtph2ps))]
15416pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15417    unsafe {
15418        transmute(vcvtph2ps(
15419            a.as_i16x16(),
15420            f32x16::ZERO,
15421            0b11111111_11111111,
15422            _MM_FROUND_NO_EXC,
15423        ))
15424    }
15425}
15426
15427/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15428///
15429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15430#[inline]
15431#[target_feature(enable = "avx512f")]
15432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15433#[cfg_attr(test, assert_instr(vcvtph2ps))]
15434pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15435    unsafe {
15436        transmute(vcvtph2ps(
15437            a.as_i16x16(),
15438            src.as_f32x16(),
15439            k,
15440            _MM_FROUND_NO_EXC,
15441        ))
15442    }
15443}
15444
15445/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15446///
15447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15448#[inline]
15449#[target_feature(enable = "avx512f")]
15450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15451#[cfg_attr(test, assert_instr(vcvtph2ps))]
15452pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15453    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15454}
15455
15456/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15457///
15458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15459#[inline]
15460#[target_feature(enable = "avx512f,avx512vl")]
15461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15462#[cfg_attr(test, assert_instr(vcvtph2ps))]
15463pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15464    unsafe {
15465        let convert = _mm256_cvtph_ps(a);
15466        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15467    }
15468}
15469
15470/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15471///
15472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15473#[inline]
15474#[target_feature(enable = "avx512f,avx512vl")]
15475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15476#[cfg_attr(test, assert_instr(vcvtph2ps))]
15477pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15478    unsafe {
15479        let convert = _mm256_cvtph_ps(a);
15480        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15481    }
15482}
15483
15484/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15485///
15486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15487#[inline]
15488#[target_feature(enable = "avx512f,avx512vl")]
15489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15490#[cfg_attr(test, assert_instr(vcvtph2ps))]
15491pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15492    unsafe {
15493        let convert = _mm_cvtph_ps(a);
15494        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15495    }
15496}
15497
15498/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15499///
15500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15501#[inline]
15502#[target_feature(enable = "avx512f,avx512vl")]
15503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15504#[cfg_attr(test, assert_instr(vcvtph2ps))]
15505pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15506    unsafe {
15507        let convert = _mm_cvtph_ps(a);
15508        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15509    }
15510}
15511
15512/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15519#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15520#[rustc_legacy_const_generics(1)]
15521pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15522    unsafe {
15523        static_assert_sae!(SAE);
15524        let a = a.as_f32x16();
15525        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15526        transmute(r)
15527    }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15532///
15533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15534#[inline]
15535#[target_feature(enable = "avx512f")]
15536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15537#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15538#[rustc_legacy_const_generics(3)]
15539pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15540    src: __m512i,
15541    k: __mmask16,
15542    a: __m512,
15543) -> __m512i {
15544    unsafe {
15545        static_assert_sae!(SAE);
15546        let a = a.as_f32x16();
15547        let src = src.as_i32x16();
15548        let r = vcvttps2dq(a, src, k, SAE);
15549        transmute(r)
15550    }
15551}
15552
15553/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15554/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15555///
15556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15557#[inline]
15558#[target_feature(enable = "avx512f")]
15559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15560#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15561#[rustc_legacy_const_generics(2)]
15562pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15563    unsafe {
15564        static_assert_sae!(SAE);
15565        let a = a.as_f32x16();
15566        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
15567        transmute(r)
15568    }
15569}
15570
15571/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15572/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15573///
15574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15575#[inline]
15576#[target_feature(enable = "avx512f")]
15577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15578#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15579#[rustc_legacy_const_generics(1)]
15580pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15581    unsafe {
15582        static_assert_sae!(SAE);
15583        let a = a.as_f32x16();
15584        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
15585        transmute(r)
15586    }
15587}
15588
15589/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15590/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15591///
15592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15593#[inline]
15594#[target_feature(enable = "avx512f")]
15595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15596#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15597#[rustc_legacy_const_generics(3)]
15598pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15599    src: __m512i,
15600    k: __mmask16,
15601    a: __m512,
15602) -> __m512i {
15603    unsafe {
15604        static_assert_sae!(SAE);
15605        let a = a.as_f32x16();
15606        let src = src.as_u32x16();
15607        let r = vcvttps2udq(a, src, k, SAE);
15608        transmute(r)
15609    }
15610}
15611
15612/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15613/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15614///
15615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15616#[inline]
15617#[target_feature(enable = "avx512f")]
15618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15619#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15620#[rustc_legacy_const_generics(2)]
15621pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15622    unsafe {
15623        static_assert_sae!(SAE);
15624        let a = a.as_f32x16();
15625        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
15626        transmute(r)
15627    }
15628}
15629
15630/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15631/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15632///
15633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15634#[inline]
15635#[target_feature(enable = "avx512f")]
15636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15637#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15638#[rustc_legacy_const_generics(1)]
15639pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15640    unsafe {
15641        static_assert_sae!(SAE);
15642        let a = a.as_f64x8();
15643        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
15644        transmute(r)
15645    }
15646}
15647
15648/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15649/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15650///
15651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15652#[inline]
15653#[target_feature(enable = "avx512f")]
15654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15655#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15656#[rustc_legacy_const_generics(3)]
15657pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15658    src: __m256i,
15659    k: __mmask8,
15660    a: __m512d,
15661) -> __m256i {
15662    unsafe {
15663        static_assert_sae!(SAE);
15664        let a = a.as_f64x8();
15665        let src = src.as_i32x8();
15666        let r = vcvttpd2dq(a, src, k, SAE);
15667        transmute(r)
15668    }
15669}
15670
15671/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15672/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15673///
15674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15675#[inline]
15676#[target_feature(enable = "avx512f")]
15677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15678#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15679#[rustc_legacy_const_generics(2)]
15680pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15681    unsafe {
15682        static_assert_sae!(SAE);
15683        let a = a.as_f64x8();
15684        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
15685        transmute(r)
15686    }
15687}
15688
15689/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15691///
15692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15693#[inline]
15694#[target_feature(enable = "avx512f")]
15695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15696#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15697#[rustc_legacy_const_generics(1)]
15698pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15699    unsafe {
15700        static_assert_sae!(SAE);
15701        let a = a.as_f64x8();
15702        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
15703        transmute(r)
15704    }
15705}
15706
15707/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15708/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15709///
15710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15711#[inline]
15712#[target_feature(enable = "avx512f")]
15713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15714#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15715#[rustc_legacy_const_generics(3)]
15716pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15717    src: __m256i,
15718    k: __mmask8,
15719    a: __m512d,
15720) -> __m256i {
15721    unsafe {
15722        static_assert_sae!(SAE);
15723        let a = a.as_f64x8();
15724        let src = src.as_i32x8();
15725        let r = vcvttpd2udq(a, src, k, SAE);
15726        transmute(r)
15727    }
15728}
15729
15730/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15731///
15732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15733#[inline]
15734#[target_feature(enable = "avx512f")]
15735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15736#[cfg_attr(test, assert_instr(vcvttps2dq))]
15737pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15738    unsafe {
15739        transmute(vcvttps2dq(
15740            a.as_f32x16(),
15741            i32x16::ZERO,
15742            0b11111111_11111111,
15743            _MM_FROUND_CUR_DIRECTION,
15744        ))
15745    }
15746}
15747
15748/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15749///
15750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15751#[inline]
15752#[target_feature(enable = "avx512f")]
15753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15754#[cfg_attr(test, assert_instr(vcvttps2dq))]
15755pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15756    unsafe {
15757        transmute(vcvttps2dq(
15758            a.as_f32x16(),
15759            src.as_i32x16(),
15760            k,
15761            _MM_FROUND_CUR_DIRECTION,
15762        ))
15763    }
15764}
15765
15766/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15769#[inline]
15770#[target_feature(enable = "avx512f")]
15771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15772#[cfg_attr(test, assert_instr(vcvttps2dq))]
15773pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15774    unsafe {
15775        transmute(vcvttps2dq(
15776            a.as_f32x16(),
15777            i32x16::ZERO,
15778            k,
15779            _MM_FROUND_CUR_DIRECTION,
15780        ))
15781    }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15785///
15786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15787#[inline]
15788#[target_feature(enable = "avx512f,avx512vl")]
15789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15790#[cfg_attr(test, assert_instr(vcvttps2dq))]
15791pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15792    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
15793}
15794
15795/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15796///
15797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15798#[inline]
15799#[target_feature(enable = "avx512f,avx512vl")]
15800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15801#[cfg_attr(test, assert_instr(vcvttps2dq))]
15802pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15803    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
15804}
15805
15806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15807///
15808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15809#[inline]
15810#[target_feature(enable = "avx512f,avx512vl")]
15811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15812#[cfg_attr(test, assert_instr(vcvttps2dq))]
15813pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15814    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
15815}
15816
15817/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15818///
15819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15820#[inline]
15821#[target_feature(enable = "avx512f,avx512vl")]
15822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15823#[cfg_attr(test, assert_instr(vcvttps2dq))]
15824pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15825    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
15826}
15827
15828/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15829///
15830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15831#[inline]
15832#[target_feature(enable = "avx512f")]
15833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15834#[cfg_attr(test, assert_instr(vcvttps2udq))]
15835pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15836    unsafe {
15837        transmute(vcvttps2udq(
15838            a.as_f32x16(),
15839            u32x16::ZERO,
15840            0b11111111_11111111,
15841            _MM_FROUND_CUR_DIRECTION,
15842        ))
15843    }
15844}
15845
15846/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15847///
15848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15849#[inline]
15850#[target_feature(enable = "avx512f")]
15851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15852#[cfg_attr(test, assert_instr(vcvttps2udq))]
15853pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15854    unsafe {
15855        transmute(vcvttps2udq(
15856            a.as_f32x16(),
15857            src.as_u32x16(),
15858            k,
15859            _MM_FROUND_CUR_DIRECTION,
15860        ))
15861    }
15862}
15863
15864/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15865///
15866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15867#[inline]
15868#[target_feature(enable = "avx512f")]
15869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15870#[cfg_attr(test, assert_instr(vcvttps2udq))]
15871pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15872    unsafe {
15873        transmute(vcvttps2udq(
15874            a.as_f32x16(),
15875            u32x16::ZERO,
15876            k,
15877            _MM_FROUND_CUR_DIRECTION,
15878        ))
15879    }
15880}
15881
15882/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15883///
15884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15885#[inline]
15886#[target_feature(enable = "avx512f,avx512vl")]
15887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15888#[cfg_attr(test, assert_instr(vcvttps2udq))]
15889pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15890    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
15891}
15892
15893/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15894///
15895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15896#[inline]
15897#[target_feature(enable = "avx512f,avx512vl")]
15898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15899#[cfg_attr(test, assert_instr(vcvttps2udq))]
15900pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15901    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
15902}
15903
15904/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15905///
15906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15907#[inline]
15908#[target_feature(enable = "avx512f,avx512vl")]
15909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15910#[cfg_attr(test, assert_instr(vcvttps2udq))]
15911pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15912    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
15913}
15914
15915/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15916///
15917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15918#[inline]
15919#[target_feature(enable = "avx512f,avx512vl")]
15920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15921#[cfg_attr(test, assert_instr(vcvttps2udq))]
15922pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15923    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
15924}
15925
15926/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15927///
15928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15929#[inline]
15930#[target_feature(enable = "avx512f,avx512vl")]
15931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15932#[cfg_attr(test, assert_instr(vcvttps2udq))]
15933pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15934    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
15935}
15936
15937/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15943#[cfg_attr(test, assert_instr(vcvttps2udq))]
15944pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
15945    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
15946}
15947
15948/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15949/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15950///
15951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
15952#[inline]
15953#[target_feature(enable = "avx512f")]
15954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15955#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15956#[rustc_legacy_const_generics(2)]
15957pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15958    unsafe {
15959        static_assert_sae!(SAE);
15960        let a = a.as_f64x8();
15961        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
15962        transmute(r)
15963    }
15964}
15965
15966/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15972#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15973pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
15974    unsafe {
15975        transmute(vcvttpd2dq(
15976            a.as_f64x8(),
15977            i32x8::ZERO,
15978            0b11111111,
15979            _MM_FROUND_CUR_DIRECTION,
15980        ))
15981    }
15982}
15983
15984/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15990#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15991pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15992    unsafe {
15993        transmute(vcvttpd2dq(
15994            a.as_f64x8(),
15995            src.as_i32x8(),
15996            k,
15997            _MM_FROUND_CUR_DIRECTION,
15998        ))
15999    }
16000}
16001
16002/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16003///
16004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16005#[inline]
16006#[target_feature(enable = "avx512f")]
16007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16008#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16009pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16010    unsafe {
16011        transmute(vcvttpd2dq(
16012            a.as_f64x8(),
16013            i32x8::ZERO,
16014            k,
16015            _MM_FROUND_CUR_DIRECTION,
16016        ))
16017    }
16018}
16019
16020/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16021///
16022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16023#[inline]
16024#[target_feature(enable = "avx512f,avx512vl")]
16025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16026#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16027pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16028    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16029}
16030
16031/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16032///
16033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16034#[inline]
16035#[target_feature(enable = "avx512f,avx512vl")]
16036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16037#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16038pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16039    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16040}
16041
16042/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16043///
16044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16045#[inline]
16046#[target_feature(enable = "avx512f,avx512vl")]
16047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16048#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16049pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16050    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16051}
16052
16053/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16054///
16055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16056#[inline]
16057#[target_feature(enable = "avx512f,avx512vl")]
16058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16059#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16060pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16061    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16062}
16063
16064/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16065///
16066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16067#[inline]
16068#[target_feature(enable = "avx512f")]
16069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16070#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16071pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16072    unsafe {
16073        transmute(vcvttpd2udq(
16074            a.as_f64x8(),
16075            i32x8::ZERO,
16076            0b11111111,
16077            _MM_FROUND_CUR_DIRECTION,
16078        ))
16079    }
16080}
16081
16082/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16083///
16084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16085#[inline]
16086#[target_feature(enable = "avx512f")]
16087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16088#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16089pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16090    unsafe {
16091        transmute(vcvttpd2udq(
16092            a.as_f64x8(),
16093            src.as_i32x8(),
16094            k,
16095            _MM_FROUND_CUR_DIRECTION,
16096        ))
16097    }
16098}
16099
16100/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16101///
16102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16103#[inline]
16104#[target_feature(enable = "avx512f")]
16105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16106#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16107pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16108    unsafe {
16109        transmute(vcvttpd2udq(
16110            a.as_f64x8(),
16111            i32x8::ZERO,
16112            k,
16113            _MM_FROUND_CUR_DIRECTION,
16114        ))
16115    }
16116}
16117
16118/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16119///
16120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16121#[inline]
16122#[target_feature(enable = "avx512f,avx512vl")]
16123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16124#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16125pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16126    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16127}
16128
16129/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16130///
16131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16132#[inline]
16133#[target_feature(enable = "avx512f,avx512vl")]
16134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16135#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16136pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16137    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16138}
16139
16140/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16141///
16142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16143#[inline]
16144#[target_feature(enable = "avx512f,avx512vl")]
16145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16146#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16147pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16148    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16149}
16150
16151/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16152///
16153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16154#[inline]
16155#[target_feature(enable = "avx512f,avx512vl")]
16156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16157#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16158pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16159    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16160}
16161
16162/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16163///
16164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16165#[inline]
16166#[target_feature(enable = "avx512f,avx512vl")]
16167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16168#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16169pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16170    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16171}
16172
16173/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16174///
16175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16176#[inline]
16177#[target_feature(enable = "avx512f,avx512vl")]
16178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16179#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16180pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16181    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16182}
16183
16184/// Returns vector of type `__m512d` with all elements set to zero.
16185///
16186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16187#[inline]
16188#[target_feature(enable = "avx512f")]
16189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16190#[cfg_attr(test, assert_instr(vxorps))]
16191pub fn _mm512_setzero_pd() -> __m512d {
16192    // All-0 is a properly initialized __m512d
16193    unsafe { const { mem::zeroed() } }
16194}
16195
16196/// Returns vector of type `__m512` with all elements set to zero.
16197///
16198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16199#[inline]
16200#[target_feature(enable = "avx512f")]
16201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16202#[cfg_attr(test, assert_instr(vxorps))]
16203pub fn _mm512_setzero_ps() -> __m512 {
16204    // All-0 is a properly initialized __m512
16205    unsafe { const { mem::zeroed() } }
16206}
16207
16208/// Return vector of type `__m512` with all elements set to zero.
16209///
16210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16211#[inline]
16212#[target_feature(enable = "avx512f")]
16213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16214#[cfg_attr(test, assert_instr(vxorps))]
16215pub fn _mm512_setzero() -> __m512 {
16216    // All-0 is a properly initialized __m512
16217    unsafe { const { mem::zeroed() } }
16218}
16219
16220/// Returns vector of type `__m512i` with all elements set to zero.
16221///
16222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16223#[inline]
16224#[target_feature(enable = "avx512f")]
16225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16226#[cfg_attr(test, assert_instr(vxorps))]
16227pub fn _mm512_setzero_si512() -> __m512i {
16228    // All-0 is a properly initialized __m512i
16229    unsafe { const { mem::zeroed() } }
16230}
16231
16232/// Return vector of type `__m512i` with all elements set to zero.
16233///
16234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16235#[inline]
16236#[target_feature(enable = "avx512f")]
16237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16238#[cfg_attr(test, assert_instr(vxorps))]
16239pub fn _mm512_setzero_epi32() -> __m512i {
16240    // All-0 is a properly initialized __m512i
16241    unsafe { const { mem::zeroed() } }
16242}
16243
16244/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16245/// order.
16246///
16247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16248#[inline]
16249#[target_feature(enable = "avx512f")]
16250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16251pub fn _mm512_setr_epi32(
16252    e15: i32,
16253    e14: i32,
16254    e13: i32,
16255    e12: i32,
16256    e11: i32,
16257    e10: i32,
16258    e9: i32,
16259    e8: i32,
16260    e7: i32,
16261    e6: i32,
16262    e5: i32,
16263    e4: i32,
16264    e3: i32,
16265    e2: i32,
16266    e1: i32,
16267    e0: i32,
16268) -> __m512i {
16269    unsafe {
16270        let r = i32x16::new(
16271            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16272        );
16273        transmute(r)
16274    }
16275}
16276
16277/// Set packed 8-bit integers in dst with the supplied values.
16278///
16279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16280#[inline]
16281#[target_feature(enable = "avx512f")]
16282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16283pub fn _mm512_set_epi8(
16284    e63: i8,
16285    e62: i8,
16286    e61: i8,
16287    e60: i8,
16288    e59: i8,
16289    e58: i8,
16290    e57: i8,
16291    e56: i8,
16292    e55: i8,
16293    e54: i8,
16294    e53: i8,
16295    e52: i8,
16296    e51: i8,
16297    e50: i8,
16298    e49: i8,
16299    e48: i8,
16300    e47: i8,
16301    e46: i8,
16302    e45: i8,
16303    e44: i8,
16304    e43: i8,
16305    e42: i8,
16306    e41: i8,
16307    e40: i8,
16308    e39: i8,
16309    e38: i8,
16310    e37: i8,
16311    e36: i8,
16312    e35: i8,
16313    e34: i8,
16314    e33: i8,
16315    e32: i8,
16316    e31: i8,
16317    e30: i8,
16318    e29: i8,
16319    e28: i8,
16320    e27: i8,
16321    e26: i8,
16322    e25: i8,
16323    e24: i8,
16324    e23: i8,
16325    e22: i8,
16326    e21: i8,
16327    e20: i8,
16328    e19: i8,
16329    e18: i8,
16330    e17: i8,
16331    e16: i8,
16332    e15: i8,
16333    e14: i8,
16334    e13: i8,
16335    e12: i8,
16336    e11: i8,
16337    e10: i8,
16338    e9: i8,
16339    e8: i8,
16340    e7: i8,
16341    e6: i8,
16342    e5: i8,
16343    e4: i8,
16344    e3: i8,
16345    e2: i8,
16346    e1: i8,
16347    e0: i8,
16348) -> __m512i {
16349    unsafe {
16350        let r = i8x64::new(
16351            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16352            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16353            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16354            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16355        );
16356        transmute(r)
16357    }
16358}
16359
16360/// Set packed 16-bit integers in dst with the supplied values.
16361///
16362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16363#[inline]
16364#[target_feature(enable = "avx512f")]
16365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16366pub fn _mm512_set_epi16(
16367    e31: i16,
16368    e30: i16,
16369    e29: i16,
16370    e28: i16,
16371    e27: i16,
16372    e26: i16,
16373    e25: i16,
16374    e24: i16,
16375    e23: i16,
16376    e22: i16,
16377    e21: i16,
16378    e20: i16,
16379    e19: i16,
16380    e18: i16,
16381    e17: i16,
16382    e16: i16,
16383    e15: i16,
16384    e14: i16,
16385    e13: i16,
16386    e12: i16,
16387    e11: i16,
16388    e10: i16,
16389    e9: i16,
16390    e8: i16,
16391    e7: i16,
16392    e6: i16,
16393    e5: i16,
16394    e4: i16,
16395    e3: i16,
16396    e2: i16,
16397    e1: i16,
16398    e0: i16,
16399) -> __m512i {
16400    unsafe {
16401        let r = i16x32::new(
16402            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16403            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16404        );
16405        transmute(r)
16406    }
16407}
16408
16409/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16410///
16411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16412#[inline]
16413#[target_feature(enable = "avx512f")]
16414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16415pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16416    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16417}
16418
16419/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16425pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16426    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16427}
16428
16429/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16430///
16431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16432#[inline]
16433#[target_feature(enable = "avx512f")]
16434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16435pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16436    _mm512_set_pd(d, c, b, a, d, c, b, a)
16437}
16438
16439/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16440///
16441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16442#[inline]
16443#[target_feature(enable = "avx512f")]
16444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16445pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16446    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16447}
16448
16449/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16450///
16451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16452#[inline]
16453#[target_feature(enable = "avx512f")]
16454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16455pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16456    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16457}
16458
16459/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16460///
16461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16462#[inline]
16463#[target_feature(enable = "avx512f")]
16464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16465pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16466    _mm512_set_pd(a, b, c, d, a, b, c, d)
16467}
16468
16469/// Set packed 64-bit integers in dst with the supplied values.
16470///
16471/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16475pub fn _mm512_set_epi64(
16476    e0: i64,
16477    e1: i64,
16478    e2: i64,
16479    e3: i64,
16480    e4: i64,
16481    e5: i64,
16482    e6: i64,
16483    e7: i64,
16484) -> __m512i {
16485    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16486}
16487
16488/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16489///
16490/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16491#[inline]
16492#[target_feature(enable = "avx512f")]
16493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16494pub fn _mm512_setr_epi64(
16495    e0: i64,
16496    e1: i64,
16497    e2: i64,
16498    e3: i64,
16499    e4: i64,
16500    e5: i64,
16501    e6: i64,
16502    e7: i64,
16503) -> __m512i {
16504    unsafe {
16505        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16506        transmute(r)
16507    }
16508}
16509
16510/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16511///
16512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16513#[inline]
16514#[target_feature(enable = "avx512f")]
16515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16516#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16517#[rustc_legacy_const_generics(2)]
16518pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(offsets: __m256i, slice: *const u8) -> __m512d {
16519    static_assert_imm8_scale!(SCALE);
16520    let zero = f64x8::ZERO;
16521    let neg_one = -1;
16522    let slice = slice as *const i8;
16523    let offsets = offsets.as_i32x8();
16524    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16525    transmute(r)
16526}
16527
16528/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16529///
16530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16531#[inline]
16532#[target_feature(enable = "avx512f")]
16533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16534#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16535#[rustc_legacy_const_generics(4)]
16536pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16537    src: __m512d,
16538    mask: __mmask8,
16539    offsets: __m256i,
16540    slice: *const u8,
16541) -> __m512d {
16542    static_assert_imm8_scale!(SCALE);
16543    let src = src.as_f64x8();
16544    let slice = slice as *const i8;
16545    let offsets = offsets.as_i32x8();
16546    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16547    transmute(r)
16548}
16549
16550/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16551///
16552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16553#[inline]
16554#[target_feature(enable = "avx512f")]
16555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16556#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16557#[rustc_legacy_const_generics(2)]
16558pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512d {
16559    static_assert_imm8_scale!(SCALE);
16560    let zero = f64x8::ZERO;
16561    let neg_one = -1;
16562    let slice = slice as *const i8;
16563    let offsets = offsets.as_i64x8();
16564    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
16565    transmute(r)
16566}
16567
16568/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16569///
16570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16571#[inline]
16572#[target_feature(enable = "avx512f")]
16573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16574#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16575#[rustc_legacy_const_generics(4)]
16576pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16577    src: __m512d,
16578    mask: __mmask8,
16579    offsets: __m512i,
16580    slice: *const u8,
16581) -> __m512d {
16582    static_assert_imm8_scale!(SCALE);
16583    let src = src.as_f64x8();
16584    let slice = slice as *const i8;
16585    let offsets = offsets.as_i64x8();
16586    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16587    transmute(r)
16588}
16589
16590/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16591///
16592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16593#[inline]
16594#[target_feature(enable = "avx512f")]
16595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16596#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16597#[rustc_legacy_const_generics(2)]
16598pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m256 {
16599    static_assert_imm8_scale!(SCALE);
16600    let zero = f32x8::ZERO;
16601    let neg_one = -1;
16602    let slice = slice as *const i8;
16603    let offsets = offsets.as_i64x8();
16604    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
16605    transmute(r)
16606}
16607
16608/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16609///
16610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16611#[inline]
16612#[target_feature(enable = "avx512f")]
16613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16614#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16615#[rustc_legacy_const_generics(4)]
16616pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16617    src: __m256,
16618    mask: __mmask8,
16619    offsets: __m512i,
16620    slice: *const u8,
16621) -> __m256 {
16622    static_assert_imm8_scale!(SCALE);
16623    let src = src.as_f32x8();
16624    let slice = slice as *const i8;
16625    let offsets = offsets.as_i64x8();
16626    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16627    transmute(r)
16628}
16629
16630/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16631///
16632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16633#[inline]
16634#[target_feature(enable = "avx512f")]
16635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16636#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16637#[rustc_legacy_const_generics(2)]
16638pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512 {
16639    static_assert_imm8_scale!(SCALE);
16640    let zero = f32x16::ZERO;
16641    let neg_one = -1;
16642    let slice = slice as *const i8;
16643    let offsets = offsets.as_i32x16();
16644    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
16645    transmute(r)
16646}
16647
16648/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16649///
16650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16651#[inline]
16652#[target_feature(enable = "avx512f")]
16653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16654#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16655#[rustc_legacy_const_generics(4)]
16656pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16657    src: __m512,
16658    mask: __mmask16,
16659    offsets: __m512i,
16660    slice: *const u8,
16661) -> __m512 {
16662    static_assert_imm8_scale!(SCALE);
16663    let src = src.as_f32x16();
16664    let slice = slice as *const i8;
16665    let offsets = offsets.as_i32x16();
16666    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16667    transmute(r)
16668}
16669
16670/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16671///
16672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16673#[inline]
16674#[target_feature(enable = "avx512f")]
16675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16676#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16677#[rustc_legacy_const_generics(2)]
16678pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16679    offsets: __m512i,
16680    slice: *const u8,
16681) -> __m512i {
16682    static_assert_imm8_scale!(SCALE);
16683    let zero = i32x16::ZERO;
16684    let neg_one = -1;
16685    let slice = slice as *const i8;
16686    let offsets = offsets.as_i32x16();
16687    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
16688    transmute(r)
16689}
16690
16691/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16692///
16693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16694#[inline]
16695#[target_feature(enable = "avx512f")]
16696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16697#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16698#[rustc_legacy_const_generics(4)]
16699pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16700    src: __m512i,
16701    mask: __mmask16,
16702    offsets: __m512i,
16703    slice: *const u8,
16704) -> __m512i {
16705    static_assert_imm8_scale!(SCALE);
16706    let src = src.as_i32x16();
16707    let mask = mask as i16;
16708    let slice = slice as *const i8;
16709    let offsets = offsets.as_i32x16();
16710    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
16711    transmute(r)
16712}
16713
16714/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16715///
16716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16717#[inline]
16718#[target_feature(enable = "avx512f")]
16719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16720#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16721#[rustc_legacy_const_generics(2)]
16722pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16723    offsets: __m256i,
16724    slice: *const u8,
16725) -> __m512i {
16726    static_assert_imm8_scale!(SCALE);
16727    let zero = i64x8::ZERO;
16728    let neg_one = -1;
16729    let slice = slice as *const i8;
16730    let offsets = offsets.as_i32x8();
16731    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
16732    transmute(r)
16733}
16734
16735/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16736///
16737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16738#[inline]
16739#[target_feature(enable = "avx512f")]
16740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16741#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16742#[rustc_legacy_const_generics(4)]
16743pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16744    src: __m512i,
16745    mask: __mmask8,
16746    offsets: __m256i,
16747    slice: *const u8,
16748) -> __m512i {
16749    static_assert_imm8_scale!(SCALE);
16750    let src = src.as_i64x8();
16751    let mask = mask as i8;
16752    let slice = slice as *const i8;
16753    let offsets = offsets.as_i32x8();
16754    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
16755    transmute(r)
16756}
16757
16758/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16759///
16760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16761#[inline]
16762#[target_feature(enable = "avx512f")]
16763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16764#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16765#[rustc_legacy_const_generics(2)]
16766pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16767    offsets: __m512i,
16768    slice: *const u8,
16769) -> __m512i {
16770    static_assert_imm8_scale!(SCALE);
16771    let zero = i64x8::ZERO;
16772    let neg_one = -1;
16773    let slice = slice as *const i8;
16774    let offsets = offsets.as_i64x8();
16775    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
16776    transmute(r)
16777}
16778
16779/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16780///
16781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16782#[inline]
16783#[target_feature(enable = "avx512f")]
16784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16785#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16786#[rustc_legacy_const_generics(4)]
16787pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16788    src: __m512i,
16789    mask: __mmask8,
16790    offsets: __m512i,
16791    slice: *const u8,
16792) -> __m512i {
16793    static_assert_imm8_scale!(SCALE);
16794    let src = src.as_i64x8();
16795    let mask = mask as i8;
16796    let slice = slice as *const i8;
16797    let offsets = offsets.as_i64x8();
16798    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
16799    transmute(r)
16800}
16801
16802/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16803///
16804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16805#[inline]
16806#[target_feature(enable = "avx512f")]
16807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16808#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16809#[rustc_legacy_const_generics(2)]
16810pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16811    offsets: __m512i,
16812    slice: *const u8,
16813) -> __m256i {
16814    static_assert_imm8_scale!(SCALE);
16815    let zeros = i32x8::ZERO;
16816    let neg_one = -1;
16817    let slice = slice as *const i8;
16818    let offsets = offsets.as_i64x8();
16819    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
16820    transmute(r)
16821}
16822
16823/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16824///
16825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16826#[inline]
16827#[target_feature(enable = "avx512f")]
16828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16829#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16830#[rustc_legacy_const_generics(4)]
16831pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16832    src: __m256i,
16833    mask: __mmask8,
16834    offsets: __m512i,
16835    slice: *const u8,
16836) -> __m256i {
16837    static_assert_imm8_scale!(SCALE);
16838    let src = src.as_i32x8();
16839    let mask = mask as i8;
16840    let slice = slice as *const i8;
16841    let offsets = offsets.as_i64x8();
16842    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
16843    transmute(r)
16844}
16845
16846/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16847///
16848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16849#[inline]
16850#[target_feature(enable = "avx512f")]
16851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16852#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16853#[rustc_legacy_const_generics(3)]
16854pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16855    slice: *mut u8,
16856    offsets: __m256i,
16857    src: __m512d,
16858) {
16859    static_assert_imm8_scale!(SCALE);
16860    let src = src.as_f64x8();
16861    let neg_one = -1;
16862    let slice = slice as *mut i8;
16863    let offsets = offsets.as_i32x8();
16864    vscatterdpd(slice, neg_one, offsets, src, SCALE);
16865}
16866
16867/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16868///
16869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16870#[inline]
16871#[target_feature(enable = "avx512f")]
16872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16873#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16874#[rustc_legacy_const_generics(4)]
16875pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16876    slice: *mut u8,
16877    mask: __mmask8,
16878    offsets: __m256i,
16879    src: __m512d,
16880) {
16881    static_assert_imm8_scale!(SCALE);
16882    let src = src.as_f64x8();
16883    let slice = slice as *mut i8;
16884    let offsets = offsets.as_i32x8();
16885    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16886}
16887
16888/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16889///
16890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16891#[inline]
16892#[target_feature(enable = "avx512f")]
16893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16894#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16895#[rustc_legacy_const_generics(3)]
16896pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16897    slice: *mut u8,
16898    offsets: __m512i,
16899    src: __m512d,
16900) {
16901    static_assert_imm8_scale!(SCALE);
16902    let src = src.as_f64x8();
16903    let neg_one = -1;
16904    let slice = slice as *mut i8;
16905    let offsets = offsets.as_i64x8();
16906    vscatterqpd(slice, neg_one, offsets, src, SCALE);
16907}
16908
16909/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16910///
16911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16912#[inline]
16913#[target_feature(enable = "avx512f")]
16914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16915#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16916#[rustc_legacy_const_generics(4)]
16917pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16918    slice: *mut u8,
16919    mask: __mmask8,
16920    offsets: __m512i,
16921    src: __m512d,
16922) {
16923    static_assert_imm8_scale!(SCALE);
16924    let src = src.as_f64x8();
16925    let slice = slice as *mut i8;
16926    let offsets = offsets.as_i64x8();
16927    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16928}
16929
16930/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16931///
16932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16933#[inline]
16934#[target_feature(enable = "avx512f")]
16935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16936#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16937#[rustc_legacy_const_generics(3)]
16938pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
16939    slice: *mut u8,
16940    offsets: __m512i,
16941    src: __m512,
16942) {
16943    static_assert_imm8_scale!(SCALE);
16944    let src = src.as_f32x16();
16945    let neg_one = -1;
16946    let slice = slice as *mut i8;
16947    let offsets = offsets.as_i32x16();
16948    vscatterdps(slice, neg_one, offsets, src, SCALE);
16949}
16950
16951/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16952///
16953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
16954#[inline]
16955#[target_feature(enable = "avx512f")]
16956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16957#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16958#[rustc_legacy_const_generics(4)]
16959pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
16960    slice: *mut u8,
16961    mask: __mmask16,
16962    offsets: __m512i,
16963    src: __m512,
16964) {
16965    static_assert_imm8_scale!(SCALE);
16966    let src = src.as_f32x16();
16967    let slice = slice as *mut i8;
16968    let offsets = offsets.as_i32x16();
16969    vscatterdps(slice, mask as i16, offsets, src, SCALE);
16970}
16971
16972/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16973///
16974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
16975#[inline]
16976#[target_feature(enable = "avx512f")]
16977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16978#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16979#[rustc_legacy_const_generics(3)]
16980pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
16981    slice: *mut u8,
16982    offsets: __m512i,
16983    src: __m256,
16984) {
16985    static_assert_imm8_scale!(SCALE);
16986    let src = src.as_f32x8();
16987    let neg_one = -1;
16988    let slice = slice as *mut i8;
16989    let offsets = offsets.as_i64x8();
16990    vscatterqps(slice, neg_one, offsets, src, SCALE);
16991}
16992
16993/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16994///
16995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
16996#[inline]
16997#[target_feature(enable = "avx512f")]
16998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16999#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17000#[rustc_legacy_const_generics(4)]
17001pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17002    slice: *mut u8,
17003    mask: __mmask8,
17004    offsets: __m512i,
17005    src: __m256,
17006) {
17007    static_assert_imm8_scale!(SCALE);
17008    let src = src.as_f32x8();
17009    let slice = slice as *mut i8;
17010    let offsets = offsets.as_i64x8();
17011    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17012}
17013
17014/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17015///
17016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17017#[inline]
17018#[target_feature(enable = "avx512f")]
17019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17020#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17021#[rustc_legacy_const_generics(3)]
17022pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17023    slice: *mut u8,
17024    offsets: __m256i,
17025    src: __m512i,
17026) {
17027    static_assert_imm8_scale!(SCALE);
17028    let src = src.as_i64x8();
17029    let neg_one = -1;
17030    let slice = slice as *mut i8;
17031    let offsets = offsets.as_i32x8();
17032    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17033}
17034
17035/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17036///
17037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17038#[inline]
17039#[target_feature(enable = "avx512f")]
17040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17041#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17042#[rustc_legacy_const_generics(4)]
17043pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17044    slice: *mut u8,
17045    mask: __mmask8,
17046    offsets: __m256i,
17047    src: __m512i,
17048) {
17049    static_assert_imm8_scale!(SCALE);
17050    let src = src.as_i64x8();
17051    let mask = mask as i8;
17052    let slice = slice as *mut i8;
17053    let offsets = offsets.as_i32x8();
17054    vpscatterdq(slice, mask, offsets, src, SCALE);
17055}
17056
17057/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17058///
17059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17060#[inline]
17061#[target_feature(enable = "avx512f")]
17062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17063#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17064#[rustc_legacy_const_generics(3)]
17065pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17066    slice: *mut u8,
17067    offsets: __m512i,
17068    src: __m512i,
17069) {
17070    static_assert_imm8_scale!(SCALE);
17071    let src = src.as_i64x8();
17072    let neg_one = -1;
17073    let slice = slice as *mut i8;
17074    let offsets = offsets.as_i64x8();
17075    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17076}
17077
17078/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17079///
17080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17081#[inline]
17082#[target_feature(enable = "avx512f")]
17083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17084#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17085#[rustc_legacy_const_generics(4)]
17086pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17087    slice: *mut u8,
17088    mask: __mmask8,
17089    offsets: __m512i,
17090    src: __m512i,
17091) {
17092    static_assert_imm8_scale!(SCALE);
17093    let src = src.as_i64x8();
17094    let mask = mask as i8;
17095    let slice = slice as *mut i8;
17096    let offsets = offsets.as_i64x8();
17097    vpscatterqq(slice, mask, offsets, src, SCALE);
17098}
17099
17100/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17101///
17102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17103#[inline]
17104#[target_feature(enable = "avx512f")]
17105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17106#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17107#[rustc_legacy_const_generics(3)]
17108pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17109    slice: *mut u8,
17110    offsets: __m512i,
17111    src: __m512i,
17112) {
17113    static_assert_imm8_scale!(SCALE);
17114    let src = src.as_i32x16();
17115    let neg_one = -1;
17116    let slice = slice as *mut i8;
17117    let offsets = offsets.as_i32x16();
17118    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17119}
17120
17121/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17122///
17123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17124#[inline]
17125#[target_feature(enable = "avx512f")]
17126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17127#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17128#[rustc_legacy_const_generics(4)]
17129pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17130    slice: *mut u8,
17131    mask: __mmask16,
17132    offsets: __m512i,
17133    src: __m512i,
17134) {
17135    static_assert_imm8_scale!(SCALE);
17136    let src = src.as_i32x16();
17137    let mask = mask as i16;
17138    let slice = slice as *mut i8;
17139    let offsets = offsets.as_i32x16();
17140    vpscatterdd(slice, mask, offsets, src, SCALE);
17141}
17142
17143/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17144///
17145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17146#[inline]
17147#[target_feature(enable = "avx512f")]
17148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17149#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17150#[rustc_legacy_const_generics(3)]
17151pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17152    slice: *mut u8,
17153    offsets: __m512i,
17154    src: __m256i,
17155) {
17156    static_assert_imm8_scale!(SCALE);
17157    let src = src.as_i32x8();
17158    let neg_one = -1;
17159    let slice = slice as *mut i8;
17160    let offsets = offsets.as_i64x8();
17161    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17162}
17163
17164/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17165///
17166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17167#[inline]
17168#[target_feature(enable = "avx512f")]
17169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17170#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17171#[rustc_legacy_const_generics(4)]
17172pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17173    slice: *mut u8,
17174    mask: __mmask8,
17175    offsets: __m512i,
17176    src: __m256i,
17177) {
17178    static_assert_imm8_scale!(SCALE);
17179    let src = src.as_i32x8();
17180    let mask = mask as i8;
17181    let slice = slice as *mut i8;
17182    let offsets = offsets.as_i64x8();
17183    vpscatterqd(slice, mask, offsets, src, SCALE);
17184}
17185
17186/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17187/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17188///
17189/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17190#[inline]
17191#[target_feature(enable = "avx512f")]
17192#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17193#[rustc_legacy_const_generics(2)]
17194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17195pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17196    vindex: __m512i,
17197    base_addr: *const u8,
17198) -> __m512i {
17199    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr as _)
17200}
17201
17202/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17203/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17204/// (elements are copied from src when the corresponding mask bit is not set).
17205///
17206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17207#[inline]
17208#[target_feature(enable = "avx512f")]
17209#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17210#[rustc_legacy_const_generics(4)]
17211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17212pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17213    src: __m512i,
17214    k: __mmask8,
17215    vindex: __m512i,
17216    base_addr: *const u8,
17217) -> __m512i {
17218    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
17219}
17220
17221/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17222/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17223///
17224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17225#[inline]
17226#[target_feature(enable = "avx512f")]
17227#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17228#[rustc_legacy_const_generics(2)]
17229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17230pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17231    vindex: __m512i,
17232    base_addr: *const u8,
17233) -> __m512d {
17234    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr as _)
17235}
17236
17237/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17238/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17239/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17240///
17241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17242#[inline]
17243#[target_feature(enable = "avx512f")]
17244#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17245#[rustc_legacy_const_generics(4)]
17246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17247pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17248    src: __m512d,
17249    k: __mmask8,
17250    vindex: __m512i,
17251    base_addr: *const u8,
17252) -> __m512d {
17253    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
17254}
17255
17256/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17257/// indices stored in the lower half of vindex scaled by scale.
17258///
17259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17260#[inline]
17261#[target_feature(enable = "avx512f")]
17262#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17263#[rustc_legacy_const_generics(3)]
17264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17265pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17266    base_addr: *mut u8,
17267    vindex: __m512i,
17268    a: __m512i,
17269) {
17270    _mm512_i32scatter_epi64::<SCALE>(base_addr as _, _mm512_castsi512_si256(vindex), a)
17271}
17272
17273/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17274/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17275/// mask bit is not set are not written to memory).
17276///
17277/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17278#[inline]
17279#[target_feature(enable = "avx512f")]
17280#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17281#[rustc_legacy_const_generics(4)]
17282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17283pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17284    base_addr: *mut u8,
17285    k: __mmask8,
17286    vindex: __m512i,
17287    a: __m512i,
17288) {
17289    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
17290}
17291
17292/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17293/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17294///
17295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17296#[inline]
17297#[target_feature(enable = "avx512f")]
17298#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17299#[rustc_legacy_const_generics(3)]
17300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17301pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17302    base_addr: *mut u8,
17303    vindex: __m512i,
17304    a: __m512d,
17305) {
17306    _mm512_i32scatter_pd::<SCALE>(base_addr as _, _mm512_castsi512_si256(vindex), a)
17307}
17308
17309/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17310/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17311/// (elements whose corresponding mask bit is not set are not written to memory).
17312///
17313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17314#[inline]
17315#[target_feature(enable = "avx512f")]
17316#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17317#[rustc_legacy_const_generics(4)]
17318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17319pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17320    base_addr: *mut u8,
17321    k: __mmask8,
17322    vindex: __m512i,
17323    a: __m512d,
17324) {
17325    _mm512_mask_i32scatter_pd::<SCALE>(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
17326}
17327
17328/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17329/// indices stored in vindex scaled by scale
17330///
17331/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17332#[inline]
17333#[target_feature(enable = "avx512f,avx512vl")]
17334#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17335#[rustc_legacy_const_generics(3)]
17336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17337pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17338    base_addr: *mut u8,
17339    vindex: __m256i,
17340    a: __m256i,
17341) {
17342    static_assert_imm8_scale!(SCALE);
17343    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17344}
17345
17346/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17347/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17348/// are not written to memory).
17349///
17350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17351#[inline]
17352#[target_feature(enable = "avx512f,avx512vl")]
17353#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17354#[rustc_legacy_const_generics(4)]
17355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17356pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17357    base_addr: *mut u8,
17358    k: __mmask8,
17359    vindex: __m256i,
17360    a: __m256i,
17361) {
17362    static_assert_imm8_scale!(SCALE);
17363    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17364}
17365
17366/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17367///
17368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17369#[inline]
17370#[target_feature(enable = "avx512f,avx512vl")]
17371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17372#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17373#[rustc_legacy_const_generics(3)]
17374pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17375    slice: *mut u8,
17376    offsets: __m128i,
17377    src: __m256i,
17378) {
17379    static_assert_imm8_scale!(SCALE);
17380    let src = src.as_i64x4();
17381    let slice = slice as *mut i8;
17382    let offsets = offsets.as_i32x4();
17383    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17384}
17385
17386/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17387/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17388/// are not written to memory).
17389///
17390/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17391#[inline]
17392#[target_feature(enable = "avx512f,avx512vl")]
17393#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17394#[rustc_legacy_const_generics(4)]
17395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17396pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17397    base_addr: *mut u8,
17398    k: __mmask8,
17399    vindex: __m128i,
17400    a: __m256i,
17401) {
17402    static_assert_imm8_scale!(SCALE);
17403    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17404}
17405
17406/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17407/// at packed 32-bit integer indices stored in vindex scaled by scale
17408///
17409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17410#[inline]
17411#[target_feature(enable = "avx512f,avx512vl")]
17412#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17413#[rustc_legacy_const_generics(3)]
17414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17415pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17416    base_addr: *mut u8,
17417    vindex: __m128i,
17418    a: __m256d,
17419) {
17420    static_assert_imm8_scale!(SCALE);
17421    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17422}
17423
17424/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17425/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17426/// mask bit is not set are not written to memory).
17427///
17428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17429#[inline]
17430#[target_feature(enable = "avx512f,avx512vl")]
17431#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17432#[rustc_legacy_const_generics(4)]
17433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17434pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17435    base_addr: *mut u8,
17436    k: __mmask8,
17437    vindex: __m128i,
17438    a: __m256d,
17439) {
17440    static_assert_imm8_scale!(SCALE);
17441    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17442}
17443
17444/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17445/// at packed 32-bit integer indices stored in vindex scaled by scale
17446///
17447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17448#[inline]
17449#[target_feature(enable = "avx512f,avx512vl")]
17450#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17451#[rustc_legacy_const_generics(3)]
17452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17453pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17454    base_addr: *mut u8,
17455    vindex: __m256i,
17456    a: __m256,
17457) {
17458    static_assert_imm8_scale!(SCALE);
17459    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17460}
17461
17462/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17463/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17464/// mask bit is not set are not written to memory).
17465///
17466/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17467#[inline]
17468#[target_feature(enable = "avx512f,avx512vl")]
17469#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17470#[rustc_legacy_const_generics(4)]
17471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17472pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17473    base_addr: *mut u8,
17474    k: __mmask8,
17475    vindex: __m256i,
17476    a: __m256,
17477) {
17478    static_assert_imm8_scale!(SCALE);
17479    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17480}
17481
17482/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17483/// indices stored in vindex scaled by scale
17484///
17485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17486#[inline]
17487#[target_feature(enable = "avx512f,avx512vl")]
17488#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17489#[rustc_legacy_const_generics(3)]
17490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17491pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17492    base_addr: *mut u8,
17493    vindex: __m256i,
17494    a: __m128i,
17495) {
17496    static_assert_imm8_scale!(SCALE);
17497    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17498}
17499
17500/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17501/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17502/// are not written to memory).
17503///
17504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17505#[inline]
17506#[target_feature(enable = "avx512f,avx512vl")]
17507#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17508#[rustc_legacy_const_generics(4)]
17509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17510pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17511    base_addr: *mut u8,
17512    k: __mmask8,
17513    vindex: __m256i,
17514    a: __m128i,
17515) {
17516    static_assert_imm8_scale!(SCALE);
17517    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17518}
17519
17520/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17521/// indices stored in vindex scaled by scale
17522///
17523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17524#[inline]
17525#[target_feature(enable = "avx512f,avx512vl")]
17526#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17527#[rustc_legacy_const_generics(3)]
17528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17529pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17530    base_addr: *mut u8,
17531    vindex: __m256i,
17532    a: __m256i,
17533) {
17534    static_assert_imm8_scale!(SCALE);
17535    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17536}
17537
17538/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17539/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17540/// are not written to memory).
17541///
17542/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17543#[inline]
17544#[target_feature(enable = "avx512f,avx512vl")]
17545#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17546#[rustc_legacy_const_generics(4)]
17547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17548pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17549    base_addr: *mut u8,
17550    k: __mmask8,
17551    vindex: __m256i,
17552    a: __m256i,
17553) {
17554    static_assert_imm8_scale!(SCALE);
17555    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17556}
17557
17558/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17559/// at packed 64-bit integer indices stored in vindex scaled by scale
17560///
17561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17562#[inline]
17563#[target_feature(enable = "avx512f,avx512vl")]
17564#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17565#[rustc_legacy_const_generics(3)]
17566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17567pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17568    base_addr: *mut u8,
17569    vindex: __m256i,
17570    a: __m256d,
17571) {
17572    static_assert_imm8_scale!(SCALE);
17573    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17574}
17575
17576/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17577/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17578/// mask bit is not set are not written to memory).
17579///
17580/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17581#[inline]
17582#[target_feature(enable = "avx512f,avx512vl")]
17583#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17584#[rustc_legacy_const_generics(4)]
17585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17586pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17587    base_addr: *mut u8,
17588    k: __mmask8,
17589    vindex: __m256i,
17590    a: __m256d,
17591) {
17592    static_assert_imm8_scale!(SCALE);
17593    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17594}
17595
17596/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17597/// at packed 64-bit integer indices stored in vindex scaled by scale
17598///
17599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17600#[inline]
17601#[target_feature(enable = "avx512f,avx512vl")]
17602#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17603#[rustc_legacy_const_generics(3)]
17604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17605pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17606    base_addr: *mut u8,
17607    vindex: __m256i,
17608    a: __m128,
17609) {
17610    static_assert_imm8_scale!(SCALE);
17611    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17612}
17613
17614/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17615/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17616/// mask bit is not set are not written to memory).
17617///
17618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17619#[inline]
17620#[target_feature(enable = "avx512f,avx512vl")]
17621#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17622#[rustc_legacy_const_generics(4)]
17623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17624pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17625    base_addr: *mut u8,
17626    k: __mmask8,
17627    vindex: __m256i,
17628    a: __m128,
17629) {
17630    static_assert_imm8_scale!(SCALE);
17631    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17632}
17633
17634/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17635/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17636/// mask bit is not set).
17637///
17638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17639#[inline]
17640#[target_feature(enable = "avx512f,avx512vl")]
17641#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17642#[rustc_legacy_const_generics(4)]
17643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17644pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17645    src: __m256i,
17646    k: __mmask8,
17647    vindex: __m256i,
17648    base_addr: *const u8,
17649) -> __m256i {
17650    static_assert_imm8_scale!(SCALE);
17651    transmute(vpgatherdd_256(
17652        src.as_i32x8(),
17653        base_addr as _,
17654        vindex.as_i32x8(),
17655        k,
17656        SCALE,
17657    ))
17658}
17659
17660/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17661/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17662/// mask bit is not set).
17663///
17664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17665#[inline]
17666#[target_feature(enable = "avx512f,avx512vl")]
17667#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17668#[rustc_legacy_const_generics(4)]
17669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17670pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17671    src: __m256i,
17672    k: __mmask8,
17673    vindex: __m128i,
17674    base_addr: *const u8,
17675) -> __m256i {
17676    static_assert_imm8_scale!(SCALE);
17677    transmute(vpgatherdq_256(
17678        src.as_i64x4(),
17679        base_addr as _,
17680        vindex.as_i32x4(),
17681        k,
17682        SCALE,
17683    ))
17684}
17685
17686/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17687/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17688/// from src when the corresponding mask bit is not set).
17689///
17690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17691#[inline]
17692#[target_feature(enable = "avx512f,avx512vl")]
17693#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17694#[rustc_legacy_const_generics(4)]
17695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17696pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17697    src: __m256d,
17698    k: __mmask8,
17699    vindex: __m128i,
17700    base_addr: *const u8,
17701) -> __m256d {
17702    static_assert_imm8_scale!(SCALE);
17703    transmute(vgatherdpd_256(
17704        src.as_f64x4(),
17705        base_addr as _,
17706        vindex.as_i32x4(),
17707        k,
17708        SCALE,
17709    ))
17710}
17711
17712/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17713/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17714/// from src when the corresponding mask bit is not set).
17715///
17716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17717#[inline]
17718#[target_feature(enable = "avx512f,avx512vl")]
17719#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17720#[rustc_legacy_const_generics(4)]
17721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17722pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17723    src: __m256,
17724    k: __mmask8,
17725    vindex: __m256i,
17726    base_addr: *const u8,
17727) -> __m256 {
17728    static_assert_imm8_scale!(SCALE);
17729    transmute(vgatherdps_256(
17730        src.as_f32x8(),
17731        base_addr as _,
17732        vindex.as_i32x8(),
17733        k,
17734        SCALE,
17735    ))
17736}
17737
17738/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17739/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17740/// mask bit is not set).
17741///
17742/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17743#[inline]
17744#[target_feature(enable = "avx512f,avx512vl")]
17745#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17746#[rustc_legacy_const_generics(4)]
17747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17748pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17749    src: __m128i,
17750    k: __mmask8,
17751    vindex: __m256i,
17752    base_addr: *const u8,
17753) -> __m128i {
17754    static_assert_imm8_scale!(SCALE);
17755    transmute(vpgatherqd_256(
17756        src.as_i32x4(),
17757        base_addr as _,
17758        vindex.as_i64x4(),
17759        k,
17760        SCALE,
17761    ))
17762}
17763
17764/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17765/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17766/// mask bit is not set).
17767///
17768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17769#[inline]
17770#[target_feature(enable = "avx512f,avx512vl")]
17771#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17772#[rustc_legacy_const_generics(4)]
17773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17774pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17775    src: __m256i,
17776    k: __mmask8,
17777    vindex: __m256i,
17778    base_addr: *const u8,
17779) -> __m256i {
17780    static_assert_imm8_scale!(SCALE);
17781    transmute(vpgatherqq_256(
17782        src.as_i64x4(),
17783        base_addr as _,
17784        vindex.as_i64x4(),
17785        k,
17786        SCALE,
17787    ))
17788}
17789
17790/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17791/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17792/// from src when the corresponding mask bit is not set).
17793///
17794/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17795#[inline]
17796#[target_feature(enable = "avx512f,avx512vl")]
17797#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17798#[rustc_legacy_const_generics(4)]
17799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17800pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17801    src: __m256d,
17802    k: __mmask8,
17803    vindex: __m256i,
17804    base_addr: *const u8,
17805) -> __m256d {
17806    static_assert_imm8_scale!(SCALE);
17807    transmute(vgatherqpd_256(
17808        src.as_f64x4(),
17809        base_addr as _,
17810        vindex.as_i64x4(),
17811        k,
17812        SCALE,
17813    ))
17814}
17815
17816/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17817/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17818/// from src when the corresponding mask bit is not set).
17819///
17820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17821#[inline]
17822#[target_feature(enable = "avx512f,avx512vl")]
17823#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17824#[rustc_legacy_const_generics(4)]
17825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17826pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17827    src: __m128,
17828    k: __mmask8,
17829    vindex: __m256i,
17830    base_addr: *const u8,
17831) -> __m128 {
17832    static_assert_imm8_scale!(SCALE);
17833    transmute(vgatherqps_256(
17834        src.as_f32x4(),
17835        base_addr as _,
17836        vindex.as_i64x4(),
17837        k,
17838        SCALE,
17839    ))
17840}
17841
17842/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17843/// indices stored in vindex scaled by scale
17844///
17845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17846#[inline]
17847#[target_feature(enable = "avx512f,avx512vl")]
17848#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17849#[rustc_legacy_const_generics(3)]
17850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17851pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17852    base_addr: *mut u8,
17853    vindex: __m128i,
17854    a: __m128i,
17855) {
17856    static_assert_imm8_scale!(SCALE);
17857    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17858}
17859
17860/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17861/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17862/// are not written to memory).
17863///
17864/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17865#[inline]
17866#[target_feature(enable = "avx512f,avx512vl")]
17867#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17868#[rustc_legacy_const_generics(4)]
17869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17870pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17871    base_addr: *mut u8,
17872    k: __mmask8,
17873    vindex: __m128i,
17874    a: __m128i,
17875) {
17876    static_assert_imm8_scale!(SCALE);
17877    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17878}
17879
17880/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17881/// indices stored in vindex scaled by scale
17882///
17883/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17884#[inline]
17885#[target_feature(enable = "avx512f,avx512vl")]
17886#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17887#[rustc_legacy_const_generics(3)]
17888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17889pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17890    base_addr: *mut u8,
17891    vindex: __m128i,
17892    a: __m128i,
17893) {
17894    static_assert_imm8_scale!(SCALE);
17895    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17896}
17897
17898/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17899/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17900/// are not written to memory).
17901///
17902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17903#[inline]
17904#[target_feature(enable = "avx512f,avx512vl")]
17905#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17906#[rustc_legacy_const_generics(4)]
17907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17908pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17909    base_addr: *mut u8,
17910    k: __mmask8,
17911    vindex: __m128i,
17912    a: __m128i,
17913) {
17914    static_assert_imm8_scale!(SCALE);
17915    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17916}
17917
17918/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17919/// at packed 32-bit integer indices stored in vindex scaled by scale
17920///
17921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17922#[inline]
17923#[target_feature(enable = "avx512f,avx512vl")]
17924#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17925#[rustc_legacy_const_generics(3)]
17926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17927pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
17928    static_assert_imm8_scale!(SCALE);
17929    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
17930}
17931
17932/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17933/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17934/// mask bit is not set are not written to memory).
17935///
17936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
17937#[inline]
17938#[target_feature(enable = "avx512f,avx512vl")]
17939#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17940#[rustc_legacy_const_generics(4)]
17941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17942pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
17943    base_addr: *mut u8,
17944    k: __mmask8,
17945    vindex: __m128i,
17946    a: __m128d,
17947) {
17948    static_assert_imm8_scale!(SCALE);
17949    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
17950}
17951
17952/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17953/// at packed 32-bit integer indices stored in vindex scaled by scale
17954///
17955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
17956#[inline]
17957#[target_feature(enable = "avx512f,avx512vl")]
17958#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17959#[rustc_legacy_const_generics(3)]
17960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17961pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
17962    static_assert_imm8_scale!(SCALE);
17963    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
17964}
17965
17966/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17967/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17968/// mask bit is not set are not written to memory).
17969///
17970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
17971#[inline]
17972#[target_feature(enable = "avx512f,avx512vl")]
17973#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17974#[rustc_legacy_const_generics(4)]
17975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17976pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
17977    base_addr: *mut u8,
17978    k: __mmask8,
17979    vindex: __m128i,
17980    a: __m128,
17981) {
17982    static_assert_imm8_scale!(SCALE);
17983    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
17984}
17985
17986/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17987/// indices stored in vindex scaled by scale
17988///
17989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
17990#[inline]
17991#[target_feature(enable = "avx512f,avx512vl")]
17992#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17993#[rustc_legacy_const_generics(3)]
17994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17995pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
17996    base_addr: *mut u8,
17997    vindex: __m128i,
17998    a: __m128i,
17999) {
18000    static_assert_imm8_scale!(SCALE);
18001    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18002}
18003
18004/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18005/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18006/// are not written to memory).
18007///
18008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18009#[inline]
18010#[target_feature(enable = "avx512f,avx512vl")]
18011#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18012#[rustc_legacy_const_generics(4)]
18013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18014pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18015    base_addr: *mut u8,
18016    k: __mmask8,
18017    vindex: __m128i,
18018    a: __m128i,
18019) {
18020    static_assert_imm8_scale!(SCALE);
18021    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18022}
18023
18024/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18025/// indices stored in vindex scaled by scale
18026///
18027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18028#[inline]
18029#[target_feature(enable = "avx512f,avx512vl")]
18030#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18031#[rustc_legacy_const_generics(3)]
18032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18033pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18034    base_addr: *mut u8,
18035    vindex: __m128i,
18036    a: __m128i,
18037) {
18038    static_assert_imm8_scale!(SCALE);
18039    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18040}
18041
18042/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18043/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18044/// are not written to memory).
18045///
18046/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18047#[inline]
18048#[target_feature(enable = "avx512f,avx512vl")]
18049#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18050#[rustc_legacy_const_generics(4)]
18051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18052pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18053    base_addr: *mut u8,
18054    k: __mmask8,
18055    vindex: __m128i,
18056    a: __m128i,
18057) {
18058    static_assert_imm8_scale!(SCALE);
18059    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18060}
18061
18062/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18063/// at packed 64-bit integer indices stored in vindex scaled by scale
18064///
18065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18066#[inline]
18067#[target_feature(enable = "avx512f,avx512vl")]
18068#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18069#[rustc_legacy_const_generics(3)]
18070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18071pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
18072    static_assert_imm8_scale!(SCALE);
18073    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18074}
18075
18076/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18077/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18078/// mask bit is not set are not written to memory).
18079///
18080/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18081#[inline]
18082#[target_feature(enable = "avx512f,avx512vl")]
18083#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18084#[rustc_legacy_const_generics(4)]
18085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18086pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18087    base_addr: *mut u8,
18088    k: __mmask8,
18089    vindex: __m128i,
18090    a: __m128d,
18091) {
18092    static_assert_imm8_scale!(SCALE);
18093    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18094}
18095
18096/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18097/// at packed 64-bit integer indices stored in vindex scaled by scale
18098///
18099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18100#[inline]
18101#[target_feature(enable = "avx512f,avx512vl")]
18102#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18103#[rustc_legacy_const_generics(3)]
18104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18105pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
18106    static_assert_imm8_scale!(SCALE);
18107    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18108}
18109
18110/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18111/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18117#[rustc_legacy_const_generics(4)]
18118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18119pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18120    base_addr: *mut u8,
18121    k: __mmask8,
18122    vindex: __m128i,
18123    a: __m128,
18124) {
18125    static_assert_imm8_scale!(SCALE);
18126    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18127}
18128
18129/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18130/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18131/// mask bit is not set).
18132///
18133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18134#[inline]
18135#[target_feature(enable = "avx512f,avx512vl")]
18136#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18137#[rustc_legacy_const_generics(4)]
18138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18139pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18140    src: __m128i,
18141    k: __mmask8,
18142    vindex: __m128i,
18143    base_addr: *const u8,
18144) -> __m128i {
18145    static_assert_imm8_scale!(SCALE);
18146    transmute(vpgatherdd_128(
18147        src.as_i32x4(),
18148        base_addr as _,
18149        vindex.as_i32x4(),
18150        k,
18151        SCALE,
18152    ))
18153}
18154
18155/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18156/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18157/// mask bit is not set).
18158///
18159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18160#[inline]
18161#[target_feature(enable = "avx512f,avx512vl")]
18162#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18163#[rustc_legacy_const_generics(4)]
18164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18165pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18166    src: __m128i,
18167    k: __mmask8,
18168    vindex: __m128i,
18169    base_addr: *const u8,
18170) -> __m128i {
18171    static_assert_imm8_scale!(SCALE);
18172    transmute(vpgatherdq_128(
18173        src.as_i64x2(),
18174        base_addr as _,
18175        vindex.as_i32x4(),
18176        k,
18177        SCALE,
18178    ))
18179}
18180
18181/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18182/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18183/// from src when the corresponding mask bit is not set).
18184///
18185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18186#[inline]
18187#[target_feature(enable = "avx512f,avx512vl")]
18188#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18189#[rustc_legacy_const_generics(4)]
18190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18191pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18192    src: __m128d,
18193    k: __mmask8,
18194    vindex: __m128i,
18195    base_addr: *const u8,
18196) -> __m128d {
18197    static_assert_imm8_scale!(SCALE);
18198    transmute(vgatherdpd_128(
18199        src.as_f64x2(),
18200        base_addr as _,
18201        vindex.as_i32x4(),
18202        k,
18203        SCALE,
18204    ))
18205}
18206
18207/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18208/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18209/// from src when the corresponding mask bit is not set).
18210///
18211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18212#[inline]
18213#[target_feature(enable = "avx512f,avx512vl")]
18214#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18215#[rustc_legacy_const_generics(4)]
18216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18217pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18218    src: __m128,
18219    k: __mmask8,
18220    vindex: __m128i,
18221    base_addr: *const u8,
18222) -> __m128 {
18223    static_assert_imm8_scale!(SCALE);
18224    transmute(vgatherdps_128(
18225        src.as_f32x4(),
18226        base_addr as _,
18227        vindex.as_i32x4(),
18228        k,
18229        SCALE,
18230    ))
18231}
18232
18233/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18234/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18235/// mask bit is not set).
18236///
18237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18238#[inline]
18239#[target_feature(enable = "avx512f,avx512vl")]
18240#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18241#[rustc_legacy_const_generics(4)]
18242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18243pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18244    src: __m128i,
18245    k: __mmask8,
18246    vindex: __m128i,
18247    base_addr: *const u8,
18248) -> __m128i {
18249    static_assert_imm8_scale!(SCALE);
18250    transmute(vpgatherqd_128(
18251        src.as_i32x4(),
18252        base_addr as _,
18253        vindex.as_i64x2(),
18254        k,
18255        SCALE,
18256    ))
18257}
18258
18259/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18260/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18261/// mask bit is not set).
18262///
18263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18264#[inline]
18265#[target_feature(enable = "avx512f,avx512vl")]
18266#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18267#[rustc_legacy_const_generics(4)]
18268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18269pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18270    src: __m128i,
18271    k: __mmask8,
18272    vindex: __m128i,
18273    base_addr: *const u8,
18274) -> __m128i {
18275    static_assert_imm8_scale!(SCALE);
18276    transmute(vpgatherqq_128(
18277        src.as_i64x2(),
18278        base_addr as _,
18279        vindex.as_i64x2(),
18280        k,
18281        SCALE,
18282    ))
18283}
18284
18285/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18286/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18287/// from src when the corresponding mask bit is not set).
18288///
18289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18290#[inline]
18291#[target_feature(enable = "avx512f,avx512vl")]
18292#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18293#[rustc_legacy_const_generics(4)]
18294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18295pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18296    src: __m128d,
18297    k: __mmask8,
18298    vindex: __m128i,
18299    base_addr: *const u8,
18300) -> __m128d {
18301    static_assert_imm8_scale!(SCALE);
18302    transmute(vgatherqpd_128(
18303        src.as_f64x2(),
18304        base_addr as _,
18305        vindex.as_i64x2(),
18306        k,
18307        SCALE,
18308    ))
18309}
18310
18311/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18312/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18313/// from src when the corresponding mask bit is not set).
18314///
18315/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18316#[inline]
18317#[target_feature(enable = "avx512f,avx512vl")]
18318#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18319#[rustc_legacy_const_generics(4)]
18320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18321pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18322    src: __m128,
18323    k: __mmask8,
18324    vindex: __m128i,
18325    base_addr: *const u8,
18326) -> __m128 {
18327    static_assert_imm8_scale!(SCALE);
18328    transmute(vgatherqps_128(
18329        src.as_f32x4(),
18330        base_addr as _,
18331        vindex.as_i64x2(),
18332        k,
18333        SCALE,
18334    ))
18335}
18336
18337/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18338///
18339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18340#[inline]
18341#[target_feature(enable = "avx512f")]
18342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18343#[cfg_attr(test, assert_instr(vpcompressd))]
18344pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18345    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18346}
18347
18348/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18349///
18350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18351#[inline]
18352#[target_feature(enable = "avx512f")]
18353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18354#[cfg_attr(test, assert_instr(vpcompressd))]
18355pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18356    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18357}
18358
18359/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18360///
18361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18362#[inline]
18363#[target_feature(enable = "avx512f,avx512vl")]
18364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18365#[cfg_attr(test, assert_instr(vpcompressd))]
18366pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18367    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18368}
18369
18370/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18371///
18372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18373#[inline]
18374#[target_feature(enable = "avx512f,avx512vl")]
18375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18376#[cfg_attr(test, assert_instr(vpcompressd))]
18377pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18378    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18379}
18380
18381/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18382///
18383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18384#[inline]
18385#[target_feature(enable = "avx512f,avx512vl")]
18386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18387#[cfg_attr(test, assert_instr(vpcompressd))]
18388pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18389    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18390}
18391
18392/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18393///
18394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18395#[inline]
18396#[target_feature(enable = "avx512f,avx512vl")]
18397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18398#[cfg_attr(test, assert_instr(vpcompressd))]
18399pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18400    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18401}
18402
18403/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18404///
18405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18406#[inline]
18407#[target_feature(enable = "avx512f")]
18408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18409#[cfg_attr(test, assert_instr(vpcompressq))]
18410pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18411    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18412}
18413
18414/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18415///
18416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18417#[inline]
18418#[target_feature(enable = "avx512f")]
18419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18420#[cfg_attr(test, assert_instr(vpcompressq))]
18421pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18422    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18423}
18424
18425/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18426///
18427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18428#[inline]
18429#[target_feature(enable = "avx512f,avx512vl")]
18430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18431#[cfg_attr(test, assert_instr(vpcompressq))]
18432pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18433    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18434}
18435
18436/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18437///
18438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18439#[inline]
18440#[target_feature(enable = "avx512f,avx512vl")]
18441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18442#[cfg_attr(test, assert_instr(vpcompressq))]
18443pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18444    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18445}
18446
18447/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18448///
18449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18450#[inline]
18451#[target_feature(enable = "avx512f,avx512vl")]
18452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18453#[cfg_attr(test, assert_instr(vpcompressq))]
18454pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18455    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18456}
18457
18458/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18459///
18460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18461#[inline]
18462#[target_feature(enable = "avx512f,avx512vl")]
18463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18464#[cfg_attr(test, assert_instr(vpcompressq))]
18465pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18466    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18467}
18468
18469/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18470///
18471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18472#[inline]
18473#[target_feature(enable = "avx512f")]
18474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18475#[cfg_attr(test, assert_instr(vcompressps))]
18476pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18477    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18478}
18479
18480/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18481///
18482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18483#[inline]
18484#[target_feature(enable = "avx512f")]
18485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18486#[cfg_attr(test, assert_instr(vcompressps))]
18487pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18488    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18489}
18490
18491/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18492///
18493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18494#[inline]
18495#[target_feature(enable = "avx512f,avx512vl")]
18496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18497#[cfg_attr(test, assert_instr(vcompressps))]
18498pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18499    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18500}
18501
18502/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18503///
18504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18505#[inline]
18506#[target_feature(enable = "avx512f,avx512vl")]
18507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18508#[cfg_attr(test, assert_instr(vcompressps))]
18509pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18510    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18511}
18512
18513/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18514///
18515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18516#[inline]
18517#[target_feature(enable = "avx512f,avx512vl")]
18518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18519#[cfg_attr(test, assert_instr(vcompressps))]
18520pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18521    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
18522}
18523
18524/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18525///
18526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18527#[inline]
18528#[target_feature(enable = "avx512f,avx512vl")]
18529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18530#[cfg_attr(test, assert_instr(vcompressps))]
18531pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18532    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
18533}
18534
18535/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18536///
18537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18538#[inline]
18539#[target_feature(enable = "avx512f")]
18540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18541#[cfg_attr(test, assert_instr(vcompresspd))]
18542pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18543    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
18544}
18545
18546/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18547///
18548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18549#[inline]
18550#[target_feature(enable = "avx512f")]
18551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18552#[cfg_attr(test, assert_instr(vcompresspd))]
18553pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18554    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
18555}
18556
18557/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18558///
18559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18560#[inline]
18561#[target_feature(enable = "avx512f,avx512vl")]
18562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18563#[cfg_attr(test, assert_instr(vcompresspd))]
18564pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18565    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
18566}
18567
18568/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18569///
18570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18571#[inline]
18572#[target_feature(enable = "avx512f,avx512vl")]
18573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18574#[cfg_attr(test, assert_instr(vcompresspd))]
18575pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18576    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
18577}
18578
18579/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18580///
18581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18582#[inline]
18583#[target_feature(enable = "avx512f,avx512vl")]
18584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18585#[cfg_attr(test, assert_instr(vcompresspd))]
18586pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18587    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
18588}
18589
18590/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18591///
18592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18593#[inline]
18594#[target_feature(enable = "avx512f,avx512vl")]
18595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18596#[cfg_attr(test, assert_instr(vcompresspd))]
18597pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18598    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
18599}
18600
18601/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18602///
18603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18604#[inline]
18605#[target_feature(enable = "avx512f")]
18606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18607#[cfg_attr(test, assert_instr(vpcompressd))]
18608pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
18609    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
18610}
18611
18612/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18613///
18614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18615#[inline]
18616#[target_feature(enable = "avx512f,avx512vl")]
18617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18618#[cfg_attr(test, assert_instr(vpcompressd))]
18619pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18620    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
18621}
18622
18623/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18624///
18625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18626#[inline]
18627#[target_feature(enable = "avx512f,avx512vl")]
18628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18629#[cfg_attr(test, assert_instr(vpcompressd))]
18630pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18631    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
18632}
18633
18634/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18635///
18636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18637#[inline]
18638#[target_feature(enable = "avx512f")]
18639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18640#[cfg_attr(test, assert_instr(vpcompressq))]
18641pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
18642    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
18643}
18644
18645/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18646///
18647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18648#[inline]
18649#[target_feature(enable = "avx512f,avx512vl")]
18650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18651#[cfg_attr(test, assert_instr(vpcompressq))]
18652pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18653    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
18654}
18655
18656/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18657///
18658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18659#[inline]
18660#[target_feature(enable = "avx512f,avx512vl")]
18661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18662#[cfg_attr(test, assert_instr(vpcompressq))]
18663pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18664    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
18665}
18666
18667/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18668///
18669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18670#[inline]
18671#[target_feature(enable = "avx512f")]
18672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18673#[cfg_attr(test, assert_instr(vcompressps))]
18674pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
18675    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
18676}
18677
18678/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18679///
18680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18681#[inline]
18682#[target_feature(enable = "avx512f,avx512vl")]
18683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18684#[cfg_attr(test, assert_instr(vcompressps))]
18685pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
18686    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
18687}
18688
18689/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18690///
18691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18692#[inline]
18693#[target_feature(enable = "avx512f,avx512vl")]
18694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18695#[cfg_attr(test, assert_instr(vcompressps))]
18696pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
18697    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
18698}
18699
18700/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18701///
18702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18703#[inline]
18704#[target_feature(enable = "avx512f")]
18705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18706#[cfg_attr(test, assert_instr(vcompresspd))]
18707pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
18708    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
18709}
18710
18711/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18712///
18713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18714#[inline]
18715#[target_feature(enable = "avx512f,avx512vl")]
18716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18717#[cfg_attr(test, assert_instr(vcompresspd))]
18718pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
18719    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
18720}
18721
18722/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18723///
18724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18725#[inline]
18726#[target_feature(enable = "avx512f,avx512vl")]
18727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18728#[cfg_attr(test, assert_instr(vcompresspd))]
18729pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
18730    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
18731}
18732
18733/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18734///
18735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18736#[inline]
18737#[target_feature(enable = "avx512f")]
18738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18739#[cfg_attr(test, assert_instr(vpexpandd))]
18740pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18741    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
18742}
18743
18744/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18745///
18746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18747#[inline]
18748#[target_feature(enable = "avx512f")]
18749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18750#[cfg_attr(test, assert_instr(vpexpandd))]
18751pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18752    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
18753}
18754
18755/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18756///
18757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18758#[inline]
18759#[target_feature(enable = "avx512f,avx512vl")]
18760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18761#[cfg_attr(test, assert_instr(vpexpandd))]
18762pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18763    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
18764}
18765
18766/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18767///
18768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18769#[inline]
18770#[target_feature(enable = "avx512f,avx512vl")]
18771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18772#[cfg_attr(test, assert_instr(vpexpandd))]
18773pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18774    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
18775}
18776
18777/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18778///
18779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18780#[inline]
18781#[target_feature(enable = "avx512f,avx512vl")]
18782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18783#[cfg_attr(test, assert_instr(vpexpandd))]
18784pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18785    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
18786}
18787
18788/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18789///
18790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18791#[inline]
18792#[target_feature(enable = "avx512f,avx512vl")]
18793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18794#[cfg_attr(test, assert_instr(vpexpandd))]
18795pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18796    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
18797}
18798
18799/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18800///
18801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18802#[inline]
18803#[target_feature(enable = "avx512f")]
18804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18805#[cfg_attr(test, assert_instr(vpexpandq))]
18806pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18807    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
18808}
18809
18810/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18811///
18812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18813#[inline]
18814#[target_feature(enable = "avx512f")]
18815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18816#[cfg_attr(test, assert_instr(vpexpandq))]
18817pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18818    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
18819}
18820
18821/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18822///
18823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18824#[inline]
18825#[target_feature(enable = "avx512f,avx512vl")]
18826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18827#[cfg_attr(test, assert_instr(vpexpandq))]
18828pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18829    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
18830}
18831
18832/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18833///
18834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18835#[inline]
18836#[target_feature(enable = "avx512f,avx512vl")]
18837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18838#[cfg_attr(test, assert_instr(vpexpandq))]
18839pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18840    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
18841}
18842
18843/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18844///
18845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18846#[inline]
18847#[target_feature(enable = "avx512f,avx512vl")]
18848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18849#[cfg_attr(test, assert_instr(vpexpandq))]
18850pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18851    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
18852}
18853
18854/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18855///
18856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18857#[inline]
18858#[target_feature(enable = "avx512f,avx512vl")]
18859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18860#[cfg_attr(test, assert_instr(vpexpandq))]
18861pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18862    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
18863}
18864
18865/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18866///
18867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18868#[inline]
18869#[target_feature(enable = "avx512f")]
18870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18871#[cfg_attr(test, assert_instr(vexpandps))]
18872pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18873    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
18874}
18875
18876/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18877///
18878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18879#[inline]
18880#[target_feature(enable = "avx512f")]
18881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18882#[cfg_attr(test, assert_instr(vexpandps))]
18883pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18884    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
18885}
18886
18887/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18888///
18889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18890#[inline]
18891#[target_feature(enable = "avx512f,avx512vl")]
18892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18893#[cfg_attr(test, assert_instr(vexpandps))]
18894pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18895    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
18896}
18897
18898/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18899///
18900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18901#[inline]
18902#[target_feature(enable = "avx512f,avx512vl")]
18903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18904#[cfg_attr(test, assert_instr(vexpandps))]
18905pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18906    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
18907}
18908
18909/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18910///
18911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18912#[inline]
18913#[target_feature(enable = "avx512f,avx512vl")]
18914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18915#[cfg_attr(test, assert_instr(vexpandps))]
18916pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18917    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
18918}
18919
18920/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18921///
18922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18923#[inline]
18924#[target_feature(enable = "avx512f,avx512vl")]
18925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18926#[cfg_attr(test, assert_instr(vexpandps))]
18927pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
18928    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
18929}
18930
18931/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18932///
18933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
18934#[inline]
18935#[target_feature(enable = "avx512f")]
18936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18937#[cfg_attr(test, assert_instr(vexpandpd))]
18938pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18939    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
18940}
18941
18942/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18943///
18944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
18945#[inline]
18946#[target_feature(enable = "avx512f")]
18947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18948#[cfg_attr(test, assert_instr(vexpandpd))]
18949pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
18950    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
18951}
18952
18953/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18954///
18955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
18956#[inline]
18957#[target_feature(enable = "avx512f,avx512vl")]
18958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18959#[cfg_attr(test, assert_instr(vexpandpd))]
18960pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18961    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
18962}
18963
18964/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18965///
18966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
18967#[inline]
18968#[target_feature(enable = "avx512f,avx512vl")]
18969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18970#[cfg_attr(test, assert_instr(vexpandpd))]
18971pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
18972    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
18973}
18974
18975/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18976///
18977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
18978#[inline]
18979#[target_feature(enable = "avx512f,avx512vl")]
18980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18981#[cfg_attr(test, assert_instr(vexpandpd))]
18982pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18983    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
18984}
18985
18986/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18987///
18988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
18989#[inline]
18990#[target_feature(enable = "avx512f,avx512vl")]
18991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18992#[cfg_attr(test, assert_instr(vexpandpd))]
18993pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
18994    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
18995}
18996
18997/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18998///
18999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19000#[inline]
19001#[target_feature(enable = "avx512f")]
19002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19003#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19004#[rustc_legacy_const_generics(1)]
19005pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19006    unsafe {
19007        static_assert_uimm_bits!(IMM8, 8);
19008        let a = a.as_i32x16();
19009        let r = vprold(a, IMM8);
19010        transmute(r)
19011    }
19012}
19013
19014/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19015///
19016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19017#[inline]
19018#[target_feature(enable = "avx512f")]
19019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19020#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19021#[rustc_legacy_const_generics(3)]
19022pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19023    unsafe {
19024        static_assert_uimm_bits!(IMM8, 8);
19025        let a = a.as_i32x16();
19026        let r = vprold(a, IMM8);
19027        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19028    }
19029}
19030
19031/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19032///
19033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19034#[inline]
19035#[target_feature(enable = "avx512f")]
19036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19037#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19038#[rustc_legacy_const_generics(2)]
19039pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19040    unsafe {
19041        static_assert_uimm_bits!(IMM8, 8);
19042        let a = a.as_i32x16();
19043        let r = vprold(a, IMM8);
19044        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19045    }
19046}
19047
19048/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19049///
19050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19051#[inline]
19052#[target_feature(enable = "avx512f,avx512vl")]
19053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19054#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19055#[rustc_legacy_const_generics(1)]
19056pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19057    unsafe {
19058        static_assert_uimm_bits!(IMM8, 8);
19059        let a = a.as_i32x8();
19060        let r = vprold256(a, IMM8);
19061        transmute(r)
19062    }
19063}
19064
19065/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19066///
19067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19068#[inline]
19069#[target_feature(enable = "avx512f,avx512vl")]
19070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19071#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19072#[rustc_legacy_const_generics(3)]
19073pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19074    unsafe {
19075        static_assert_uimm_bits!(IMM8, 8);
19076        let a = a.as_i32x8();
19077        let r = vprold256(a, IMM8);
19078        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19079    }
19080}
19081
19082/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19083///
19084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19085#[inline]
19086#[target_feature(enable = "avx512f,avx512vl")]
19087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19088#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19089#[rustc_legacy_const_generics(2)]
19090pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19091    unsafe {
19092        static_assert_uimm_bits!(IMM8, 8);
19093        let a = a.as_i32x8();
19094        let r = vprold256(a, IMM8);
19095        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19096    }
19097}
19098
19099/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19100///
19101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19102#[inline]
19103#[target_feature(enable = "avx512f,avx512vl")]
19104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19105#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19106#[rustc_legacy_const_generics(1)]
19107pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19108    unsafe {
19109        static_assert_uimm_bits!(IMM8, 8);
19110        let a = a.as_i32x4();
19111        let r = vprold128(a, IMM8);
19112        transmute(r)
19113    }
19114}
19115
19116/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19117///
19118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19119#[inline]
19120#[target_feature(enable = "avx512f,avx512vl")]
19121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19122#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19123#[rustc_legacy_const_generics(3)]
19124pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19125    unsafe {
19126        static_assert_uimm_bits!(IMM8, 8);
19127        let a = a.as_i32x4();
19128        let r = vprold128(a, IMM8);
19129        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19130    }
19131}
19132
19133/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19134///
19135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19136#[inline]
19137#[target_feature(enable = "avx512f,avx512vl")]
19138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19139#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19140#[rustc_legacy_const_generics(2)]
19141pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19142    unsafe {
19143        static_assert_uimm_bits!(IMM8, 8);
19144        let a = a.as_i32x4();
19145        let r = vprold128(a, IMM8);
19146        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19147    }
19148}
19149
19150/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19151///
19152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19153#[inline]
19154#[target_feature(enable = "avx512f")]
19155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19156#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19157#[rustc_legacy_const_generics(1)]
19158pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19159    unsafe {
19160        static_assert_uimm_bits!(IMM8, 8);
19161        let a = a.as_i32x16();
19162        let r = vprord(a, IMM8);
19163        transmute(r)
19164    }
19165}
19166
19167/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19168///
19169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19170#[inline]
19171#[target_feature(enable = "avx512f")]
19172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19173#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19174#[rustc_legacy_const_generics(3)]
19175pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19176    unsafe {
19177        static_assert_uimm_bits!(IMM8, 8);
19178        let a = a.as_i32x16();
19179        let r = vprord(a, IMM8);
19180        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19181    }
19182}
19183
19184/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19185///
19186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19187#[inline]
19188#[target_feature(enable = "avx512f")]
19189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19190#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19191#[rustc_legacy_const_generics(2)]
19192pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19193    unsafe {
19194        static_assert_uimm_bits!(IMM8, 8);
19195        let a = a.as_i32x16();
19196        let r = vprord(a, IMM8);
19197        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19198    }
19199}
19200
19201/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19202///
19203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19204#[inline]
19205#[target_feature(enable = "avx512f,avx512vl")]
19206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19207#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19208#[rustc_legacy_const_generics(1)]
19209pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19210    unsafe {
19211        static_assert_uimm_bits!(IMM8, 8);
19212        let a = a.as_i32x8();
19213        let r = vprord256(a, IMM8);
19214        transmute(r)
19215    }
19216}
19217
19218/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19219///
19220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19221#[inline]
19222#[target_feature(enable = "avx512f,avx512vl")]
19223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19224#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19225#[rustc_legacy_const_generics(3)]
19226pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19227    unsafe {
19228        static_assert_uimm_bits!(IMM8, 8);
19229        let a = a.as_i32x8();
19230        let r = vprord256(a, IMM8);
19231        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19232    }
19233}
19234
19235/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19236///
19237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19238#[inline]
19239#[target_feature(enable = "avx512f,avx512vl")]
19240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19241#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19242#[rustc_legacy_const_generics(2)]
19243pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19244    unsafe {
19245        static_assert_uimm_bits!(IMM8, 8);
19246        let a = a.as_i32x8();
19247        let r = vprord256(a, IMM8);
19248        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19249    }
19250}
19251
19252/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19253///
19254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19255#[inline]
19256#[target_feature(enable = "avx512f,avx512vl")]
19257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19258#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19259#[rustc_legacy_const_generics(1)]
19260pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19261    unsafe {
19262        static_assert_uimm_bits!(IMM8, 8);
19263        let a = a.as_i32x4();
19264        let r = vprord128(a, IMM8);
19265        transmute(r)
19266    }
19267}
19268
19269/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19270///
19271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19272#[inline]
19273#[target_feature(enable = "avx512f,avx512vl")]
19274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19275#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19276#[rustc_legacy_const_generics(3)]
19277pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19278    unsafe {
19279        static_assert_uimm_bits!(IMM8, 8);
19280        let a = a.as_i32x4();
19281        let r = vprord128(a, IMM8);
19282        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19283    }
19284}
19285
19286/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19287///
19288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19289#[inline]
19290#[target_feature(enable = "avx512f,avx512vl")]
19291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19292#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19293#[rustc_legacy_const_generics(2)]
19294pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19295    unsafe {
19296        static_assert_uimm_bits!(IMM8, 8);
19297        let a = a.as_i32x4();
19298        let r = vprord128(a, IMM8);
19299        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19300    }
19301}
19302
19303/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19304///
19305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19306#[inline]
19307#[target_feature(enable = "avx512f")]
19308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19309#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19310#[rustc_legacy_const_generics(1)]
19311pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19312    unsafe {
19313        static_assert_uimm_bits!(IMM8, 8);
19314        let a = a.as_i64x8();
19315        let r = vprolq(a, IMM8);
19316        transmute(r)
19317    }
19318}
19319
19320/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19321///
19322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19323#[inline]
19324#[target_feature(enable = "avx512f")]
19325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19326#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19327#[rustc_legacy_const_generics(3)]
19328pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19329    unsafe {
19330        static_assert_uimm_bits!(IMM8, 8);
19331        let a = a.as_i64x8();
19332        let r = vprolq(a, IMM8);
19333        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19334    }
19335}
19336
19337/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338///
19339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19340#[inline]
19341#[target_feature(enable = "avx512f")]
19342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19343#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19344#[rustc_legacy_const_generics(2)]
19345pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19346    unsafe {
19347        static_assert_uimm_bits!(IMM8, 8);
19348        let a = a.as_i64x8();
19349        let r = vprolq(a, IMM8);
19350        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19351    }
19352}
19353
19354/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19355///
19356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19357#[inline]
19358#[target_feature(enable = "avx512f,avx512vl")]
19359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19360#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19361#[rustc_legacy_const_generics(1)]
19362pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19363    unsafe {
19364        static_assert_uimm_bits!(IMM8, 8);
19365        let a = a.as_i64x4();
19366        let r = vprolq256(a, IMM8);
19367        transmute(r)
19368    }
19369}
19370
19371/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19372///
19373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19374#[inline]
19375#[target_feature(enable = "avx512f,avx512vl")]
19376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19377#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19378#[rustc_legacy_const_generics(3)]
19379pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19380    unsafe {
19381        static_assert_uimm_bits!(IMM8, 8);
19382        let a = a.as_i64x4();
19383        let r = vprolq256(a, IMM8);
19384        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19385    }
19386}
19387
19388/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19389///
19390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19391#[inline]
19392#[target_feature(enable = "avx512f,avx512vl")]
19393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19394#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19395#[rustc_legacy_const_generics(2)]
19396pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19397    unsafe {
19398        static_assert_uimm_bits!(IMM8, 8);
19399        let a = a.as_i64x4();
19400        let r = vprolq256(a, IMM8);
19401        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19402    }
19403}
19404
19405/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19406///
19407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19408#[inline]
19409#[target_feature(enable = "avx512f,avx512vl")]
19410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19411#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19412#[rustc_legacy_const_generics(1)]
19413pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19414    unsafe {
19415        static_assert_uimm_bits!(IMM8, 8);
19416        let a = a.as_i64x2();
19417        let r = vprolq128(a, IMM8);
19418        transmute(r)
19419    }
19420}
19421
19422/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19423///
19424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19425#[inline]
19426#[target_feature(enable = "avx512f,avx512vl")]
19427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19428#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19429#[rustc_legacy_const_generics(3)]
19430pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19431    unsafe {
19432        static_assert_uimm_bits!(IMM8, 8);
19433        let a = a.as_i64x2();
19434        let r = vprolq128(a, IMM8);
19435        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19436    }
19437}
19438
19439/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19440///
19441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19442#[inline]
19443#[target_feature(enable = "avx512f,avx512vl")]
19444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19445#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19446#[rustc_legacy_const_generics(2)]
19447pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19448    unsafe {
19449        static_assert_uimm_bits!(IMM8, 8);
19450        let a = a.as_i64x2();
19451        let r = vprolq128(a, IMM8);
19452        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19453    }
19454}
19455
19456/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19457///
19458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19459#[inline]
19460#[target_feature(enable = "avx512f")]
19461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19462#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19463#[rustc_legacy_const_generics(1)]
19464pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19465    unsafe {
19466        static_assert_uimm_bits!(IMM8, 8);
19467        let a = a.as_i64x8();
19468        let r = vprorq(a, IMM8);
19469        transmute(r)
19470    }
19471}
19472
19473/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19474///
19475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19476#[inline]
19477#[target_feature(enable = "avx512f")]
19478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19479#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19480#[rustc_legacy_const_generics(3)]
19481pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19482    unsafe {
19483        static_assert_uimm_bits!(IMM8, 8);
19484        let a = a.as_i64x8();
19485        let r = vprorq(a, IMM8);
19486        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19487    }
19488}
19489
19490/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19491///
19492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19493#[inline]
19494#[target_feature(enable = "avx512f")]
19495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19496#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19497#[rustc_legacy_const_generics(2)]
19498pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19499    unsafe {
19500        static_assert_uimm_bits!(IMM8, 8);
19501        let a = a.as_i64x8();
19502        let r = vprorq(a, IMM8);
19503        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19504    }
19505}
19506
19507/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19508///
19509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19510#[inline]
19511#[target_feature(enable = "avx512f,avx512vl")]
19512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19513#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19514#[rustc_legacy_const_generics(1)]
19515pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19516    unsafe {
19517        static_assert_uimm_bits!(IMM8, 8);
19518        let a = a.as_i64x4();
19519        let r = vprorq256(a, IMM8);
19520        transmute(r)
19521    }
19522}
19523
19524/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19525///
19526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19527#[inline]
19528#[target_feature(enable = "avx512f,avx512vl")]
19529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19530#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19531#[rustc_legacy_const_generics(3)]
19532pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19533    unsafe {
19534        static_assert_uimm_bits!(IMM8, 8);
19535        let a = a.as_i64x4();
19536        let r = vprorq256(a, IMM8);
19537        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19538    }
19539}
19540
19541/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19542///
19543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19544#[inline]
19545#[target_feature(enable = "avx512f,avx512vl")]
19546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19547#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19548#[rustc_legacy_const_generics(2)]
19549pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19550    unsafe {
19551        static_assert_uimm_bits!(IMM8, 8);
19552        let a = a.as_i64x4();
19553        let r = vprorq256(a, IMM8);
19554        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19555    }
19556}
19557
19558/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19561#[inline]
19562#[target_feature(enable = "avx512f,avx512vl")]
19563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19564#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19565#[rustc_legacy_const_generics(1)]
19566pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19567    unsafe {
19568        static_assert_uimm_bits!(IMM8, 8);
19569        let a = a.as_i64x2();
19570        let r = vprorq128(a, IMM8);
19571        transmute(r)
19572    }
19573}
19574
19575/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19576///
19577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19578#[inline]
19579#[target_feature(enable = "avx512f,avx512vl")]
19580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19581#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19582#[rustc_legacy_const_generics(3)]
19583pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19584    unsafe {
19585        static_assert_uimm_bits!(IMM8, 8);
19586        let a = a.as_i64x2();
19587        let r = vprorq128(a, IMM8);
19588        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19589    }
19590}
19591
19592/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19593///
19594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19595#[inline]
19596#[target_feature(enable = "avx512f,avx512vl")]
19597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19598#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19599#[rustc_legacy_const_generics(2)]
19600pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19601    unsafe {
19602        static_assert_uimm_bits!(IMM8, 8);
19603        let a = a.as_i64x2();
19604        let r = vprorq128(a, IMM8);
19605        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19606    }
19607}
19608
19609/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19610///
19611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19612#[inline]
19613#[target_feature(enable = "avx512f")]
19614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19615#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19616#[rustc_legacy_const_generics(1)]
19617pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19618    unsafe {
19619        static_assert_uimm_bits!(IMM8, 8);
19620        if IMM8 >= 32 {
19621            _mm512_setzero_si512()
19622        } else {
19623            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
19624        }
19625    }
19626}
19627
19628/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629///
19630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19631#[inline]
19632#[target_feature(enable = "avx512f")]
19633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19634#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19635#[rustc_legacy_const_generics(3)]
19636pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19637    unsafe {
19638        static_assert_uimm_bits!(IMM8, 8);
19639        let shf = if IMM8 >= 32 {
19640            u32x16::ZERO
19641        } else {
19642            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
19643        };
19644        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19645    }
19646}
19647
19648/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19649///
19650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19651#[inline]
19652#[target_feature(enable = "avx512f")]
19653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19654#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19655#[rustc_legacy_const_generics(2)]
19656pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19657    unsafe {
19658        static_assert_uimm_bits!(IMM8, 8);
19659        if IMM8 >= 32 {
19660            _mm512_setzero_si512()
19661        } else {
19662            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
19663            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19664        }
19665    }
19666}
19667
19668/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19669///
19670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19671#[inline]
19672#[target_feature(enable = "avx512f,avx512vl")]
19673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19674#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19675#[rustc_legacy_const_generics(3)]
19676pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19677    unsafe {
19678        static_assert_uimm_bits!(IMM8, 8);
19679        let r = if IMM8 >= 32 {
19680            u32x8::ZERO
19681        } else {
19682            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
19683        };
19684        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19685    }
19686}
19687
19688/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19689///
19690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19691#[inline]
19692#[target_feature(enable = "avx512f,avx512vl")]
19693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19694#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19695#[rustc_legacy_const_generics(2)]
19696pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19697    unsafe {
19698        static_assert_uimm_bits!(IMM8, 8);
19699        if IMM8 >= 32 {
19700            _mm256_setzero_si256()
19701        } else {
19702            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
19703            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19704        }
19705    }
19706}
19707
19708/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19709///
19710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19711#[inline]
19712#[target_feature(enable = "avx512f,avx512vl")]
19713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19714#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19715#[rustc_legacy_const_generics(3)]
19716pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19717    unsafe {
19718        static_assert_uimm_bits!(IMM8, 8);
19719        let r = if IMM8 >= 32 {
19720            u32x4::ZERO
19721        } else {
19722            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
19723        };
19724        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19725    }
19726}
19727
19728/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19729///
19730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19731#[inline]
19732#[target_feature(enable = "avx512f,avx512vl")]
19733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19734#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19735#[rustc_legacy_const_generics(2)]
19736pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19737    unsafe {
19738        static_assert_uimm_bits!(IMM8, 8);
19739        if IMM8 >= 32 {
19740            _mm_setzero_si128()
19741        } else {
19742            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
19743            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19744        }
19745    }
19746}
19747
19748/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19749///
19750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19751#[inline]
19752#[target_feature(enable = "avx512f")]
19753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19754#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19755#[rustc_legacy_const_generics(1)]
19756pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19757    unsafe {
19758        static_assert_uimm_bits!(IMM8, 8);
19759        if IMM8 >= 32 {
19760            _mm512_setzero_si512()
19761        } else {
19762            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
19763        }
19764    }
19765}
19766
19767/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19768///
19769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19770#[inline]
19771#[target_feature(enable = "avx512f")]
19772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19773#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19774#[rustc_legacy_const_generics(3)]
19775pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19776    unsafe {
19777        static_assert_uimm_bits!(IMM8, 8);
19778        let shf = if IMM8 >= 32 {
19779            u32x16::ZERO
19780        } else {
19781            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
19782        };
19783        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19784    }
19785}
19786
19787/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19788///
19789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19790#[inline]
19791#[target_feature(enable = "avx512f")]
19792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19793#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19794#[rustc_legacy_const_generics(2)]
19795pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19796    unsafe {
19797        static_assert_uimm_bits!(IMM8, 8);
19798        if IMM8 >= 32 {
19799            _mm512_setzero_si512()
19800        } else {
19801            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
19802            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19803        }
19804    }
19805}
19806
19807/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19808///
19809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19810#[inline]
19811#[target_feature(enable = "avx512f,avx512vl")]
19812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19813#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19814#[rustc_legacy_const_generics(3)]
19815pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19816    unsafe {
19817        static_assert_uimm_bits!(IMM8, 8);
19818        let r = if IMM8 >= 32 {
19819            u32x8::ZERO
19820        } else {
19821            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
19822        };
19823        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19824    }
19825}
19826
19827/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19828///
19829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19830#[inline]
19831#[target_feature(enable = "avx512f,avx512vl")]
19832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19833#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19834#[rustc_legacy_const_generics(2)]
19835pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19836    unsafe {
19837        static_assert_uimm_bits!(IMM8, 8);
19838        if IMM8 >= 32 {
19839            _mm256_setzero_si256()
19840        } else {
19841            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
19842            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19843        }
19844    }
19845}
19846
19847/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19848///
19849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19850#[inline]
19851#[target_feature(enable = "avx512f,avx512vl")]
19852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19853#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19854#[rustc_legacy_const_generics(3)]
19855pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19856    unsafe {
19857        static_assert_uimm_bits!(IMM8, 8);
19858        let r = if IMM8 >= 32 {
19859            u32x4::ZERO
19860        } else {
19861            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
19862        };
19863        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19864    }
19865}
19866
19867/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19868///
19869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19870#[inline]
19871#[target_feature(enable = "avx512f,avx512vl")]
19872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19873#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19874#[rustc_legacy_const_generics(2)]
19875pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19876    unsafe {
19877        static_assert_uimm_bits!(IMM8, 8);
19878        if IMM8 >= 32 {
19879            _mm_setzero_si128()
19880        } else {
19881            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
19882            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19883        }
19884    }
19885}
19886
19887/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19888///
19889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19890#[inline]
19891#[target_feature(enable = "avx512f")]
19892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19893#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19894#[rustc_legacy_const_generics(1)]
19895pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19896    unsafe {
19897        static_assert_uimm_bits!(IMM8, 8);
19898        if IMM8 >= 64 {
19899            _mm512_setzero_si512()
19900        } else {
19901            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
19902        }
19903    }
19904}
19905
19906/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19907///
19908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19909#[inline]
19910#[target_feature(enable = "avx512f")]
19911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19912#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19913#[rustc_legacy_const_generics(3)]
19914pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19915    unsafe {
19916        static_assert_uimm_bits!(IMM8, 8);
19917        let shf = if IMM8 >= 64 {
19918            u64x8::ZERO
19919        } else {
19920            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
19921        };
19922        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
19923    }
19924}
19925
19926/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19927///
19928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
19929#[inline]
19930#[target_feature(enable = "avx512f")]
19931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19932#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19933#[rustc_legacy_const_generics(2)]
19934pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19935    unsafe {
19936        static_assert_uimm_bits!(IMM8, 8);
19937        if IMM8 >= 64 {
19938            _mm512_setzero_si512()
19939        } else {
19940            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
19941            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
19942        }
19943    }
19944}
19945
19946/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19947///
19948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
19949#[inline]
19950#[target_feature(enable = "avx512f,avx512vl")]
19951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19952#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19953#[rustc_legacy_const_generics(3)]
19954pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19955    unsafe {
19956        static_assert_uimm_bits!(IMM8, 8);
19957        let r = if IMM8 >= 64 {
19958            u64x4::ZERO
19959        } else {
19960            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
19961        };
19962        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
19963    }
19964}
19965
19966/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19967///
19968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
19969#[inline]
19970#[target_feature(enable = "avx512f,avx512vl")]
19971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19972#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19973#[rustc_legacy_const_generics(2)]
19974pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19975    unsafe {
19976        static_assert_uimm_bits!(IMM8, 8);
19977        if IMM8 >= 64 {
19978            _mm256_setzero_si256()
19979        } else {
19980            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
19981            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
19982        }
19983    }
19984}
19985
19986/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19987///
19988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
19989#[inline]
19990#[target_feature(enable = "avx512f,avx512vl")]
19991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19992#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19993#[rustc_legacy_const_generics(3)]
19994pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19995    unsafe {
19996        static_assert_uimm_bits!(IMM8, 8);
19997        let r = if IMM8 >= 64 {
19998            u64x2::ZERO
19999        } else {
20000            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20001        };
20002        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20003    }
20004}
20005
20006/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20007///
20008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20009#[inline]
20010#[target_feature(enable = "avx512f,avx512vl")]
20011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20012#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20013#[rustc_legacy_const_generics(2)]
20014pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20015    unsafe {
20016        static_assert_uimm_bits!(IMM8, 8);
20017        if IMM8 >= 64 {
20018            _mm_setzero_si128()
20019        } else {
20020            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20021            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20022        }
20023    }
20024}
20025
20026/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20027///
20028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20029#[inline]
20030#[target_feature(enable = "avx512f")]
20031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20032#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20033#[rustc_legacy_const_generics(1)]
20034pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20035    unsafe {
20036        static_assert_uimm_bits!(IMM8, 8);
20037        if IMM8 >= 64 {
20038            _mm512_setzero_si512()
20039        } else {
20040            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20041        }
20042    }
20043}
20044
20045/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20046///
20047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20048#[inline]
20049#[target_feature(enable = "avx512f")]
20050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20051#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20052#[rustc_legacy_const_generics(3)]
20053pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20054    unsafe {
20055        static_assert_uimm_bits!(IMM8, 8);
20056        let shf = if IMM8 >= 64 {
20057            u64x8::ZERO
20058        } else {
20059            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20060        };
20061        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20062    }
20063}
20064
20065/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20066///
20067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20068#[inline]
20069#[target_feature(enable = "avx512f")]
20070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20071#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20072#[rustc_legacy_const_generics(2)]
20073pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20074    unsafe {
20075        static_assert_uimm_bits!(IMM8, 8);
20076        if IMM8 >= 64 {
20077            _mm512_setzero_si512()
20078        } else {
20079            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20080            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20081        }
20082    }
20083}
20084
20085/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20086///
20087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20088#[inline]
20089#[target_feature(enable = "avx512f,avx512vl")]
20090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20091#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20092#[rustc_legacy_const_generics(3)]
20093pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20094    unsafe {
20095        static_assert_uimm_bits!(IMM8, 8);
20096        let r = if IMM8 >= 64 {
20097            u64x4::ZERO
20098        } else {
20099            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20100        };
20101        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20102    }
20103}
20104
20105/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20106///
20107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20108#[inline]
20109#[target_feature(enable = "avx512f,avx512vl")]
20110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20111#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20112#[rustc_legacy_const_generics(2)]
20113pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20114    unsafe {
20115        static_assert_uimm_bits!(IMM8, 8);
20116        if IMM8 >= 64 {
20117            _mm256_setzero_si256()
20118        } else {
20119            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20120            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20121        }
20122    }
20123}
20124
20125/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20126///
20127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20128#[inline]
20129#[target_feature(enable = "avx512f,avx512vl")]
20130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20131#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20132#[rustc_legacy_const_generics(3)]
20133pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20134    unsafe {
20135        static_assert_uimm_bits!(IMM8, 8);
20136        let r = if IMM8 >= 64 {
20137            u64x2::ZERO
20138        } else {
20139            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20140        };
20141        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20142    }
20143}
20144
20145/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20146///
20147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20148#[inline]
20149#[target_feature(enable = "avx512f,avx512vl")]
20150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20151#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20152#[rustc_legacy_const_generics(2)]
20153pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20154    unsafe {
20155        static_assert_uimm_bits!(IMM8, 8);
20156        if IMM8 >= 64 {
20157            _mm_setzero_si128()
20158        } else {
20159            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20160            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20161        }
20162    }
20163}
20164
20165/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20166///
20167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20168#[inline]
20169#[target_feature(enable = "avx512f")]
20170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20171#[cfg_attr(test, assert_instr(vpslld))]
20172pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20173    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20174}
20175
20176/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20177///
20178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20179#[inline]
20180#[target_feature(enable = "avx512f")]
20181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20182#[cfg_attr(test, assert_instr(vpslld))]
20183pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20184    unsafe {
20185        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20186        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20187    }
20188}
20189
20190/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20191///
20192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20193#[inline]
20194#[target_feature(enable = "avx512f")]
20195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20196#[cfg_attr(test, assert_instr(vpslld))]
20197pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20198    unsafe {
20199        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20200        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20201    }
20202}
20203
20204/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20205///
20206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20207#[inline]
20208#[target_feature(enable = "avx512f,avx512vl")]
20209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20210#[cfg_attr(test, assert_instr(vpslld))]
20211pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20212    unsafe {
20213        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20214        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20215    }
20216}
20217
20218/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20219///
20220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20221#[inline]
20222#[target_feature(enable = "avx512f,avx512vl")]
20223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20224#[cfg_attr(test, assert_instr(vpslld))]
20225pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20226    unsafe {
20227        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20228        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20229    }
20230}
20231
20232/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20233///
20234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20235#[inline]
20236#[target_feature(enable = "avx512f,avx512vl")]
20237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20238#[cfg_attr(test, assert_instr(vpslld))]
20239pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20240    unsafe {
20241        let shf = _mm_sll_epi32(a, count).as_i32x4();
20242        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20243    }
20244}
20245
20246/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20247///
20248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20249#[inline]
20250#[target_feature(enable = "avx512f,avx512vl")]
20251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20252#[cfg_attr(test, assert_instr(vpslld))]
20253pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20254    unsafe {
20255        let shf = _mm_sll_epi32(a, count).as_i32x4();
20256        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20257    }
20258}
20259
20260/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20261///
20262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20263#[inline]
20264#[target_feature(enable = "avx512f")]
20265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20266#[cfg_attr(test, assert_instr(vpsrld))]
20267pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20268    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20269}
20270
20271/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20272///
20273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20274#[inline]
20275#[target_feature(enable = "avx512f")]
20276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20277#[cfg_attr(test, assert_instr(vpsrld))]
20278pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20279    unsafe {
20280        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20281        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20282    }
20283}
20284
20285/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20286///
20287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20288#[inline]
20289#[target_feature(enable = "avx512f")]
20290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20291#[cfg_attr(test, assert_instr(vpsrld))]
20292pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20293    unsafe {
20294        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20295        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20296    }
20297}
20298
20299/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20300///
20301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20302#[inline]
20303#[target_feature(enable = "avx512f,avx512vl")]
20304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20305#[cfg_attr(test, assert_instr(vpsrld))]
20306pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20307    unsafe {
20308        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20309        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20310    }
20311}
20312
20313/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20314///
20315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20316#[inline]
20317#[target_feature(enable = "avx512f,avx512vl")]
20318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20319#[cfg_attr(test, assert_instr(vpsrld))]
20320pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20321    unsafe {
20322        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20323        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20324    }
20325}
20326
20327/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20328///
20329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20330#[inline]
20331#[target_feature(enable = "avx512f,avx512vl")]
20332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20333#[cfg_attr(test, assert_instr(vpsrld))]
20334pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20335    unsafe {
20336        let shf = _mm_srl_epi32(a, count).as_i32x4();
20337        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20338    }
20339}
20340
20341/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20342///
20343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20344#[inline]
20345#[target_feature(enable = "avx512f,avx512vl")]
20346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20347#[cfg_attr(test, assert_instr(vpsrld))]
20348pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20349    unsafe {
20350        let shf = _mm_srl_epi32(a, count).as_i32x4();
20351        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20352    }
20353}
20354
20355/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20356///
20357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20358#[inline]
20359#[target_feature(enable = "avx512f")]
20360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20361#[cfg_attr(test, assert_instr(vpsllq))]
20362pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20363    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20364}
20365
20366/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20367///
20368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20369#[inline]
20370#[target_feature(enable = "avx512f")]
20371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20372#[cfg_attr(test, assert_instr(vpsllq))]
20373pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20374    unsafe {
20375        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20376        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20377    }
20378}
20379
20380/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20381///
20382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20383#[inline]
20384#[target_feature(enable = "avx512f")]
20385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20386#[cfg_attr(test, assert_instr(vpsllq))]
20387pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20388    unsafe {
20389        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20390        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20391    }
20392}
20393
20394/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20395///
20396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20397#[inline]
20398#[target_feature(enable = "avx512f,avx512vl")]
20399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20400#[cfg_attr(test, assert_instr(vpsllq))]
20401pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20402    unsafe {
20403        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20404        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20405    }
20406}
20407
20408/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20409///
20410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20411#[inline]
20412#[target_feature(enable = "avx512f,avx512vl")]
20413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20414#[cfg_attr(test, assert_instr(vpsllq))]
20415pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20416    unsafe {
20417        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20418        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20419    }
20420}
20421
20422/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20423///
20424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20425#[inline]
20426#[target_feature(enable = "avx512f,avx512vl")]
20427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20428#[cfg_attr(test, assert_instr(vpsllq))]
20429pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20430    unsafe {
20431        let shf = _mm_sll_epi64(a, count).as_i64x2();
20432        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20433    }
20434}
20435
20436/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20437///
20438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20439#[inline]
20440#[target_feature(enable = "avx512f,avx512vl")]
20441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20442#[cfg_attr(test, assert_instr(vpsllq))]
20443pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20444    unsafe {
20445        let shf = _mm_sll_epi64(a, count).as_i64x2();
20446        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20447    }
20448}
20449
20450/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20451///
20452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20453#[inline]
20454#[target_feature(enable = "avx512f")]
20455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20456#[cfg_attr(test, assert_instr(vpsrlq))]
20457pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20458    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20459}
20460
20461/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20462///
20463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20464#[inline]
20465#[target_feature(enable = "avx512f")]
20466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20467#[cfg_attr(test, assert_instr(vpsrlq))]
20468pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20469    unsafe {
20470        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20471        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20472    }
20473}
20474
20475/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20476///
20477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20478#[inline]
20479#[target_feature(enable = "avx512f")]
20480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20481#[cfg_attr(test, assert_instr(vpsrlq))]
20482pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20483    unsafe {
20484        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20485        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20486    }
20487}
20488
20489/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20490///
20491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20492#[inline]
20493#[target_feature(enable = "avx512f,avx512vl")]
20494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20495#[cfg_attr(test, assert_instr(vpsrlq))]
20496pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20497    unsafe {
20498        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20499        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20500    }
20501}
20502
20503/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20504///
20505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20506#[inline]
20507#[target_feature(enable = "avx512f,avx512vl")]
20508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20509#[cfg_attr(test, assert_instr(vpsrlq))]
20510pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20511    unsafe {
20512        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20513        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20514    }
20515}
20516
20517/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20518///
20519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20520#[inline]
20521#[target_feature(enable = "avx512f,avx512vl")]
20522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20523#[cfg_attr(test, assert_instr(vpsrlq))]
20524pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20525    unsafe {
20526        let shf = _mm_srl_epi64(a, count).as_i64x2();
20527        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20528    }
20529}
20530
20531/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20532///
20533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20534#[inline]
20535#[target_feature(enable = "avx512f,avx512vl")]
20536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20537#[cfg_attr(test, assert_instr(vpsrlq))]
20538pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20539    unsafe {
20540        let shf = _mm_srl_epi64(a, count).as_i64x2();
20541        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20542    }
20543}
20544
20545/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20546///
20547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20548#[inline]
20549#[target_feature(enable = "avx512f")]
20550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20551#[cfg_attr(test, assert_instr(vpsrad))]
20552pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20553    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
20554}
20555
20556/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20559#[inline]
20560#[target_feature(enable = "avx512f")]
20561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20562#[cfg_attr(test, assert_instr(vpsrad))]
20563pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20564    unsafe {
20565        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20566        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20567    }
20568}
20569
20570/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20571///
20572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20573#[inline]
20574#[target_feature(enable = "avx512f")]
20575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20576#[cfg_attr(test, assert_instr(vpsrad))]
20577pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20578    unsafe {
20579        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20580        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20581    }
20582}
20583
20584/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20585///
20586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20587#[inline]
20588#[target_feature(enable = "avx512f,avx512vl")]
20589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20590#[cfg_attr(test, assert_instr(vpsrad))]
20591pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20592    unsafe {
20593        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20594        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20595    }
20596}
20597
20598/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20599///
20600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20601#[inline]
20602#[target_feature(enable = "avx512f,avx512vl")]
20603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20604#[cfg_attr(test, assert_instr(vpsrad))]
20605pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20606    unsafe {
20607        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20608        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20609    }
20610}
20611
20612/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20613///
20614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20615#[inline]
20616#[target_feature(enable = "avx512f,avx512vl")]
20617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20618#[cfg_attr(test, assert_instr(vpsrad))]
20619pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20620    unsafe {
20621        let shf = _mm_sra_epi32(a, count).as_i32x4();
20622        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20623    }
20624}
20625
20626/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20627///
20628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20629#[inline]
20630#[target_feature(enable = "avx512f,avx512vl")]
20631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20632#[cfg_attr(test, assert_instr(vpsrad))]
20633pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20634    unsafe {
20635        let shf = _mm_sra_epi32(a, count).as_i32x4();
20636        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20637    }
20638}
20639
20640/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20641///
20642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20643#[inline]
20644#[target_feature(enable = "avx512f")]
20645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20646#[cfg_attr(test, assert_instr(vpsraq))]
20647pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20648    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
20649}
20650
20651/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20652///
20653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20654#[inline]
20655#[target_feature(enable = "avx512f")]
20656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20657#[cfg_attr(test, assert_instr(vpsraq))]
20658pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20659    unsafe {
20660        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20661        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20662    }
20663}
20664
20665/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20666///
20667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20668#[inline]
20669#[target_feature(enable = "avx512f")]
20670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20671#[cfg_attr(test, assert_instr(vpsraq))]
20672pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20673    unsafe {
20674        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20675        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20676    }
20677}
20678
20679/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20680///
20681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20682#[inline]
20683#[target_feature(enable = "avx512f,avx512vl")]
20684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20685#[cfg_attr(test, assert_instr(vpsraq))]
20686pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20687    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20688}
20689
20690/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20691///
20692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20693#[inline]
20694#[target_feature(enable = "avx512f,avx512vl")]
20695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20696#[cfg_attr(test, assert_instr(vpsraq))]
20697pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20698    unsafe {
20699        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20700        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20701    }
20702}
20703
20704/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20705///
20706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20707#[inline]
20708#[target_feature(enable = "avx512f,avx512vl")]
20709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20710#[cfg_attr(test, assert_instr(vpsraq))]
20711pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20712    unsafe {
20713        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20714        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20715    }
20716}
20717
20718/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20719///
20720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20721#[inline]
20722#[target_feature(enable = "avx512f,avx512vl")]
20723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20724#[cfg_attr(test, assert_instr(vpsraq))]
20725pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20726    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20727}
20728
20729/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20730///
20731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20732#[inline]
20733#[target_feature(enable = "avx512f,avx512vl")]
20734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20735#[cfg_attr(test, assert_instr(vpsraq))]
20736pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737    unsafe {
20738        let shf = _mm_sra_epi64(a, count).as_i64x2();
20739        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20740    }
20741}
20742
20743/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20746#[inline]
20747#[target_feature(enable = "avx512f,avx512vl")]
20748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20749#[cfg_attr(test, assert_instr(vpsraq))]
20750pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20751    unsafe {
20752        let shf = _mm_sra_epi64(a, count).as_i64x2();
20753        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20754    }
20755}
20756
20757/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20758///
20759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20760#[inline]
20761#[target_feature(enable = "avx512f")]
20762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20763#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20764#[rustc_legacy_const_generics(1)]
20765pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20766    unsafe {
20767        static_assert_uimm_bits!(IMM8, 8);
20768        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
20769    }
20770}
20771
20772/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20773///
20774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20775#[inline]
20776#[target_feature(enable = "avx512f")]
20777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20778#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20779#[rustc_legacy_const_generics(3)]
20780pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20781    unsafe {
20782        static_assert_uimm_bits!(IMM8, 8);
20783        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20784        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
20785    }
20786}
20787
20788/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20789///
20790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20791#[inline]
20792#[target_feature(enable = "avx512f")]
20793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20794#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20795#[rustc_legacy_const_generics(2)]
20796pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20797    unsafe {
20798        static_assert_uimm_bits!(IMM8, 8);
20799        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20800        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
20801    }
20802}
20803
20804/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20805///
20806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20807#[inline]
20808#[target_feature(enable = "avx512f,avx512vl")]
20809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20810#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20811#[rustc_legacy_const_generics(3)]
20812pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20813    unsafe {
20814        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20815        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
20816    }
20817}
20818
20819/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20820///
20821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20822#[inline]
20823#[target_feature(enable = "avx512f,avx512vl")]
20824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20825#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20826#[rustc_legacy_const_generics(2)]
20827pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20828    unsafe {
20829        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20830        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
20831    }
20832}
20833
20834/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20835///
20836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20837#[inline]
20838#[target_feature(enable = "avx512f,avx512vl")]
20839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20840#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20841#[rustc_legacy_const_generics(3)]
20842pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20843    unsafe {
20844        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20845        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
20846    }
20847}
20848
20849/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20850///
20851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20852#[inline]
20853#[target_feature(enable = "avx512f,avx512vl")]
20854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20855#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20856#[rustc_legacy_const_generics(2)]
20857pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20858    unsafe {
20859        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20860        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
20861    }
20862}
20863
20864/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20865///
20866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20867#[inline]
20868#[target_feature(enable = "avx512f")]
20869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20870#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20871#[rustc_legacy_const_generics(1)]
20872pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20873    unsafe {
20874        static_assert_uimm_bits!(IMM8, 8);
20875        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
20876    }
20877}
20878
20879/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20880///
20881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20882#[inline]
20883#[target_feature(enable = "avx512f")]
20884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20885#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20886#[rustc_legacy_const_generics(3)]
20887pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20888    unsafe {
20889        static_assert_uimm_bits!(IMM8, 8);
20890        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20891        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20892    }
20893}
20894
20895/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20896///
20897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20898#[inline]
20899#[target_feature(enable = "avx512f")]
20900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20901#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20902#[rustc_legacy_const_generics(2)]
20903pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20904    unsafe {
20905        static_assert_uimm_bits!(IMM8, 8);
20906        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20907        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20908    }
20909}
20910
20911/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20912///
20913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20914#[inline]
20915#[target_feature(enable = "avx512f,avx512vl")]
20916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20917#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20918#[rustc_legacy_const_generics(1)]
20919pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20920    unsafe {
20921        static_assert_uimm_bits!(IMM8, 8);
20922        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
20923    }
20924}
20925
20926/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20927///
20928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
20929#[inline]
20930#[target_feature(enable = "avx512f,avx512vl")]
20931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20932#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20933#[rustc_legacy_const_generics(3)]
20934pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20935    unsafe {
20936        static_assert_uimm_bits!(IMM8, 8);
20937        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20938        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20939    }
20940}
20941
20942/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20943///
20944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
20945#[inline]
20946#[target_feature(enable = "avx512f,avx512vl")]
20947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20948#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20949#[rustc_legacy_const_generics(2)]
20950pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20951    unsafe {
20952        static_assert_uimm_bits!(IMM8, 8);
20953        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20954        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20955    }
20956}
20957
20958/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20959///
20960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
20961#[inline]
20962#[target_feature(enable = "avx512f,avx512vl")]
20963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20964#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20965#[rustc_legacy_const_generics(1)]
20966pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
20967    unsafe {
20968        static_assert_uimm_bits!(IMM8, 8);
20969        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
20970    }
20971}
20972
20973/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20974///
20975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
20976#[inline]
20977#[target_feature(enable = "avx512f,avx512vl")]
20978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20979#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20980#[rustc_legacy_const_generics(3)]
20981pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20982    unsafe {
20983        static_assert_uimm_bits!(IMM8, 8);
20984        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
20985        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20986    }
20987}
20988
20989/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20990///
20991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
20992#[inline]
20993#[target_feature(enable = "avx512f,avx512vl")]
20994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20995#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20996#[rustc_legacy_const_generics(2)]
20997pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20998    unsafe {
20999        static_assert_uimm_bits!(IMM8, 8);
21000        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21001        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21002    }
21003}
21004
21005/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21006///
21007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21008#[inline]
21009#[target_feature(enable = "avx512f")]
21010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21011#[cfg_attr(test, assert_instr(vpsravd))]
21012pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21013    unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
21014}
21015
21016/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21017///
21018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21019#[inline]
21020#[target_feature(enable = "avx512f")]
21021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21022#[cfg_attr(test, assert_instr(vpsravd))]
21023pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21024    unsafe {
21025        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21026        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21027    }
21028}
21029
21030/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21031///
21032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21033#[inline]
21034#[target_feature(enable = "avx512f")]
21035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21036#[cfg_attr(test, assert_instr(vpsravd))]
21037pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21038    unsafe {
21039        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21040        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21041    }
21042}
21043
21044/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21045///
21046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21047#[inline]
21048#[target_feature(enable = "avx512f,avx512vl")]
21049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21050#[cfg_attr(test, assert_instr(vpsravd))]
21051pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21052    unsafe {
21053        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21054        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21055    }
21056}
21057
21058/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21059///
21060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21061#[inline]
21062#[target_feature(enable = "avx512f,avx512vl")]
21063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21064#[cfg_attr(test, assert_instr(vpsravd))]
21065pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21066    unsafe {
21067        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21068        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21069    }
21070}
21071
21072/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21073///
21074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21075#[inline]
21076#[target_feature(enable = "avx512f,avx512vl")]
21077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21078#[cfg_attr(test, assert_instr(vpsravd))]
21079pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21080    unsafe {
21081        let shf = _mm_srav_epi32(a, count).as_i32x4();
21082        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21083    }
21084}
21085
21086/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21087///
21088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21089#[inline]
21090#[target_feature(enable = "avx512f,avx512vl")]
21091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21092#[cfg_attr(test, assert_instr(vpsravd))]
21093pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21094    unsafe {
21095        let shf = _mm_srav_epi32(a, count).as_i32x4();
21096        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21097    }
21098}
21099
21100/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21101///
21102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21103#[inline]
21104#[target_feature(enable = "avx512f")]
21105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21106#[cfg_attr(test, assert_instr(vpsravq))]
21107pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21108    unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
21109}
21110
21111/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21112///
21113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21114#[inline]
21115#[target_feature(enable = "avx512f")]
21116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21117#[cfg_attr(test, assert_instr(vpsravq))]
21118pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21119    unsafe {
21120        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21121        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21122    }
21123}
21124
21125/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21126///
21127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21128#[inline]
21129#[target_feature(enable = "avx512f")]
21130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21131#[cfg_attr(test, assert_instr(vpsravq))]
21132pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21133    unsafe {
21134        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21135        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21136    }
21137}
21138
21139/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21140///
21141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21142#[inline]
21143#[target_feature(enable = "avx512f,avx512vl")]
21144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21145#[cfg_attr(test, assert_instr(vpsravq))]
21146pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21147    unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21148}
21149
21150/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21151///
21152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21153#[inline]
21154#[target_feature(enable = "avx512f,avx512vl")]
21155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21156#[cfg_attr(test, assert_instr(vpsravq))]
21157pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21158    unsafe {
21159        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21160        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21161    }
21162}
21163
21164/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21165///
21166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21167#[inline]
21168#[target_feature(enable = "avx512f,avx512vl")]
21169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21170#[cfg_attr(test, assert_instr(vpsravq))]
21171pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21172    unsafe {
21173        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21174        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21175    }
21176}
21177
21178/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21179///
21180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21181#[inline]
21182#[target_feature(enable = "avx512f,avx512vl")]
21183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21184#[cfg_attr(test, assert_instr(vpsravq))]
21185pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21186    unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21187}
21188
21189/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21190///
21191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21192#[inline]
21193#[target_feature(enable = "avx512f,avx512vl")]
21194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21195#[cfg_attr(test, assert_instr(vpsravq))]
21196pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21197    unsafe {
21198        let shf = _mm_srav_epi64(a, count).as_i64x2();
21199        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21200    }
21201}
21202
21203/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21204///
21205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21206#[inline]
21207#[target_feature(enable = "avx512f,avx512vl")]
21208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21209#[cfg_attr(test, assert_instr(vpsravq))]
21210pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21211    unsafe {
21212        let shf = _mm_srav_epi64(a, count).as_i64x2();
21213        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21214    }
21215}
21216
21217/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21218///
21219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21220#[inline]
21221#[target_feature(enable = "avx512f")]
21222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21223#[cfg_attr(test, assert_instr(vprolvd))]
21224pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21225    unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) }
21226}
21227
21228/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21229///
21230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21231#[inline]
21232#[target_feature(enable = "avx512f")]
21233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21234#[cfg_attr(test, assert_instr(vprolvd))]
21235pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21236    unsafe {
21237        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21238        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21239    }
21240}
21241
21242/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21243///
21244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21245#[inline]
21246#[target_feature(enable = "avx512f")]
21247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21248#[cfg_attr(test, assert_instr(vprolvd))]
21249pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21250    unsafe {
21251        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21252        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21253    }
21254}
21255
21256/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21259#[inline]
21260#[target_feature(enable = "avx512f,avx512vl")]
21261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21262#[cfg_attr(test, assert_instr(vprolvd))]
21263pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21264    unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21265}
21266
21267/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21268///
21269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21270#[inline]
21271#[target_feature(enable = "avx512f,avx512vl")]
21272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21273#[cfg_attr(test, assert_instr(vprolvd))]
21274pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21275    unsafe {
21276        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21277        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21278    }
21279}
21280
21281/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21282///
21283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21284#[inline]
21285#[target_feature(enable = "avx512f,avx512vl")]
21286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21287#[cfg_attr(test, assert_instr(vprolvd))]
21288pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21289    unsafe {
21290        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21291        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21292    }
21293}
21294
21295/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21296///
21297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21298#[inline]
21299#[target_feature(enable = "avx512f,avx512vl")]
21300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21301#[cfg_attr(test, assert_instr(vprolvd))]
21302pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21303    unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21304}
21305
21306/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21307///
21308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21309#[inline]
21310#[target_feature(enable = "avx512f,avx512vl")]
21311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21312#[cfg_attr(test, assert_instr(vprolvd))]
21313pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21314    unsafe {
21315        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21316        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21317    }
21318}
21319
21320/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21321///
21322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21323#[inline]
21324#[target_feature(enable = "avx512f,avx512vl")]
21325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21326#[cfg_attr(test, assert_instr(vprolvd))]
21327pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21328    unsafe {
21329        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21330        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21331    }
21332}
21333
21334/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21335///
21336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21337#[inline]
21338#[target_feature(enable = "avx512f")]
21339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21340#[cfg_attr(test, assert_instr(vprorvd))]
21341pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21342    unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) }
21343}
21344
21345/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21346///
21347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21348#[inline]
21349#[target_feature(enable = "avx512f")]
21350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21351#[cfg_attr(test, assert_instr(vprorvd))]
21352pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21353    unsafe {
21354        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21355        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21356    }
21357}
21358
21359/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21360///
21361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21362#[inline]
21363#[target_feature(enable = "avx512f")]
21364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21365#[cfg_attr(test, assert_instr(vprorvd))]
21366pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21367    unsafe {
21368        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21369        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21370    }
21371}
21372
21373/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21374///
21375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21376#[inline]
21377#[target_feature(enable = "avx512f,avx512vl")]
21378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21379#[cfg_attr(test, assert_instr(vprorvd))]
21380pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21381    unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21382}
21383
21384/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21385///
21386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21387#[inline]
21388#[target_feature(enable = "avx512f,avx512vl")]
21389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21390#[cfg_attr(test, assert_instr(vprorvd))]
21391pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21392    unsafe {
21393        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21394        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
21395    }
21396}
21397
21398/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21399///
21400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21401#[inline]
21402#[target_feature(enable = "avx512f,avx512vl")]
21403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21404#[cfg_attr(test, assert_instr(vprorvd))]
21405pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21406    unsafe {
21407        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21408        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
21409    }
21410}
21411
21412/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21413///
21414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21415#[inline]
21416#[target_feature(enable = "avx512f,avx512vl")]
21417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21418#[cfg_attr(test, assert_instr(vprorvd))]
21419pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21420    unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21421}
21422
21423/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21424///
21425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21426#[inline]
21427#[target_feature(enable = "avx512f,avx512vl")]
21428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21429#[cfg_attr(test, assert_instr(vprorvd))]
21430pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21431    unsafe {
21432        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21433        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
21434    }
21435}
21436
21437/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21438///
21439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21440#[inline]
21441#[target_feature(enable = "avx512f,avx512vl")]
21442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21443#[cfg_attr(test, assert_instr(vprorvd))]
21444pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21445    unsafe {
21446        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21447        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
21448    }
21449}
21450
21451/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21452///
21453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21454#[inline]
21455#[target_feature(enable = "avx512f")]
21456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21457#[cfg_attr(test, assert_instr(vprolvq))]
21458pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21459    unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) }
21460}
21461
21462/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21463///
21464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21465#[inline]
21466#[target_feature(enable = "avx512f")]
21467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21468#[cfg_attr(test, assert_instr(vprolvq))]
21469pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21470    unsafe {
21471        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21472        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
21473    }
21474}
21475
21476/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21477///
21478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21479#[inline]
21480#[target_feature(enable = "avx512f")]
21481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21482#[cfg_attr(test, assert_instr(vprolvq))]
21483pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21484    unsafe {
21485        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21486        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
21487    }
21488}
21489
21490/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21496#[cfg_attr(test, assert_instr(vprolvq))]
21497pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21498    unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21499}
21500
21501/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21502///
21503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21504#[inline]
21505#[target_feature(enable = "avx512f,avx512vl")]
21506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21507#[cfg_attr(test, assert_instr(vprolvq))]
21508pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21509    unsafe {
21510        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21511        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
21512    }
21513}
21514
21515/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21516///
21517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21518#[inline]
21519#[target_feature(enable = "avx512f,avx512vl")]
21520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21521#[cfg_attr(test, assert_instr(vprolvq))]
21522pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21523    unsafe {
21524        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21525        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
21526    }
21527}
21528
21529/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21530///
21531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21532#[inline]
21533#[target_feature(enable = "avx512f,avx512vl")]
21534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21535#[cfg_attr(test, assert_instr(vprolvq))]
21536pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21537    unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21538}
21539
21540/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21541///
21542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21543#[inline]
21544#[target_feature(enable = "avx512f,avx512vl")]
21545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21546#[cfg_attr(test, assert_instr(vprolvq))]
21547pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21548    unsafe {
21549        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21550        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
21551    }
21552}
21553
21554/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21555///
21556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21557#[inline]
21558#[target_feature(enable = "avx512f,avx512vl")]
21559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21560#[cfg_attr(test, assert_instr(vprolvq))]
21561pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21562    unsafe {
21563        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21564        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
21565    }
21566}
21567
21568/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21569///
21570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21571#[inline]
21572#[target_feature(enable = "avx512f")]
21573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21574#[cfg_attr(test, assert_instr(vprorvq))]
21575pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21576    unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) }
21577}
21578
21579/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21580///
21581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21582#[inline]
21583#[target_feature(enable = "avx512f")]
21584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21585#[cfg_attr(test, assert_instr(vprorvq))]
21586pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21587    unsafe {
21588        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21589        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
21590    }
21591}
21592
21593/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21594///
21595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21596#[inline]
21597#[target_feature(enable = "avx512f")]
21598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21599#[cfg_attr(test, assert_instr(vprorvq))]
21600pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21601    unsafe {
21602        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21603        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
21604    }
21605}
21606
21607/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21608///
21609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21610#[inline]
21611#[target_feature(enable = "avx512f,avx512vl")]
21612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21613#[cfg_attr(test, assert_instr(vprorvq))]
21614pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21615    unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21616}
21617
21618/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21619///
21620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21621#[inline]
21622#[target_feature(enable = "avx512f,avx512vl")]
21623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21624#[cfg_attr(test, assert_instr(vprorvq))]
21625pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21626    unsafe {
21627        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21628        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
21629    }
21630}
21631
21632/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21633///
21634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21635#[inline]
21636#[target_feature(enable = "avx512f,avx512vl")]
21637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21638#[cfg_attr(test, assert_instr(vprorvq))]
21639pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21640    unsafe {
21641        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21642        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
21643    }
21644}
21645
21646/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21647///
21648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21649#[inline]
21650#[target_feature(enable = "avx512f,avx512vl")]
21651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21652#[cfg_attr(test, assert_instr(vprorvq))]
21653pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21654    unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21655}
21656
21657/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21658///
21659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21660#[inline]
21661#[target_feature(enable = "avx512f,avx512vl")]
21662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21663#[cfg_attr(test, assert_instr(vprorvq))]
21664pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21665    unsafe {
21666        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21667        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
21668    }
21669}
21670
21671/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21672///
21673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21674#[inline]
21675#[target_feature(enable = "avx512f,avx512vl")]
21676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21677#[cfg_attr(test, assert_instr(vprorvq))]
21678pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21679    unsafe {
21680        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21681        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
21682    }
21683}
21684
21685/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21686///
21687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21688#[inline]
21689#[target_feature(enable = "avx512f")]
21690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21691#[cfg_attr(test, assert_instr(vpsllvd))]
21692pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21693    unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
21694}
21695
21696/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21697///
21698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21699#[inline]
21700#[target_feature(enable = "avx512f")]
21701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21702#[cfg_attr(test, assert_instr(vpsllvd))]
21703pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21704    unsafe {
21705        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21706        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21707    }
21708}
21709
21710/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21711///
21712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21713#[inline]
21714#[target_feature(enable = "avx512f")]
21715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21716#[cfg_attr(test, assert_instr(vpsllvd))]
21717pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21718    unsafe {
21719        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21720        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21721    }
21722}
21723
21724/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21725///
21726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21727#[inline]
21728#[target_feature(enable = "avx512f,avx512vl")]
21729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21730#[cfg_attr(test, assert_instr(vpsllvd))]
21731pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21732    unsafe {
21733        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21734        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21735    }
21736}
21737
21738/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21739///
21740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21741#[inline]
21742#[target_feature(enable = "avx512f,avx512vl")]
21743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21744#[cfg_attr(test, assert_instr(vpsllvd))]
21745pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21746    unsafe {
21747        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21748        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21749    }
21750}
21751
21752/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21753///
21754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21755#[inline]
21756#[target_feature(enable = "avx512f,avx512vl")]
21757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21758#[cfg_attr(test, assert_instr(vpsllvd))]
21759pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21760    unsafe {
21761        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21762        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21763    }
21764}
21765
21766/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21772#[cfg_attr(test, assert_instr(vpsllvd))]
21773pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21774    unsafe {
21775        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21776        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21777    }
21778}
21779
21780/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21781///
21782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21783#[inline]
21784#[target_feature(enable = "avx512f")]
21785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21786#[cfg_attr(test, assert_instr(vpsrlvd))]
21787pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21788    unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
21789}
21790
21791/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21792///
21793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21794#[inline]
21795#[target_feature(enable = "avx512f")]
21796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21797#[cfg_attr(test, assert_instr(vpsrlvd))]
21798pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21799    unsafe {
21800        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21801        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21802    }
21803}
21804
21805/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21806///
21807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21808#[inline]
21809#[target_feature(enable = "avx512f")]
21810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21811#[cfg_attr(test, assert_instr(vpsrlvd))]
21812pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21813    unsafe {
21814        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21815        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21816    }
21817}
21818
21819/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21820///
21821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21822#[inline]
21823#[target_feature(enable = "avx512f,avx512vl")]
21824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21825#[cfg_attr(test, assert_instr(vpsrlvd))]
21826pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21827    unsafe {
21828        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21829        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21830    }
21831}
21832
21833/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21834///
21835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21836#[inline]
21837#[target_feature(enable = "avx512f,avx512vl")]
21838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21839#[cfg_attr(test, assert_instr(vpsrlvd))]
21840pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21841    unsafe {
21842        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21843        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21844    }
21845}
21846
21847/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21848///
21849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21850#[inline]
21851#[target_feature(enable = "avx512f,avx512vl")]
21852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21853#[cfg_attr(test, assert_instr(vpsrlvd))]
21854pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21855    unsafe {
21856        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21857        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21858    }
21859}
21860
21861/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21862///
21863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21864#[inline]
21865#[target_feature(enable = "avx512f,avx512vl")]
21866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21867#[cfg_attr(test, assert_instr(vpsrlvd))]
21868pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21869    unsafe {
21870        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21871        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21872    }
21873}
21874
21875/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21876///
21877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21878#[inline]
21879#[target_feature(enable = "avx512f")]
21880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21881#[cfg_attr(test, assert_instr(vpsllvq))]
21882pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21883    unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
21884}
21885
21886/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21887///
21888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21889#[inline]
21890#[target_feature(enable = "avx512f")]
21891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21892#[cfg_attr(test, assert_instr(vpsllvq))]
21893pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21894    unsafe {
21895        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21896        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21897    }
21898}
21899
21900/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21901///
21902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21903#[inline]
21904#[target_feature(enable = "avx512f")]
21905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21906#[cfg_attr(test, assert_instr(vpsllvq))]
21907pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21908    unsafe {
21909        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21910        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21911    }
21912}
21913
21914/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21915///
21916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21917#[inline]
21918#[target_feature(enable = "avx512f,avx512vl")]
21919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21920#[cfg_attr(test, assert_instr(vpsllvq))]
21921pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21922    unsafe {
21923        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21924        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21925    }
21926}
21927
21928/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21929///
21930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
21931#[inline]
21932#[target_feature(enable = "avx512f,avx512vl")]
21933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21934#[cfg_attr(test, assert_instr(vpsllvq))]
21935pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21936    unsafe {
21937        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21938        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21939    }
21940}
21941
21942/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21943///
21944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
21945#[inline]
21946#[target_feature(enable = "avx512f,avx512vl")]
21947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21948#[cfg_attr(test, assert_instr(vpsllvq))]
21949pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21950    unsafe {
21951        let shf = _mm_sllv_epi64(a, count).as_i64x2();
21952        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21953    }
21954}
21955
21956/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21957///
21958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
21959#[inline]
21960#[target_feature(enable = "avx512f,avx512vl")]
21961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21962#[cfg_attr(test, assert_instr(vpsllvq))]
21963pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21964    unsafe {
21965        let shf = _mm_sllv_epi64(a, count).as_i64x2();
21966        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21967    }
21968}
21969
21970/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21971///
21972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
21973#[inline]
21974#[target_feature(enable = "avx512f")]
21975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21976#[cfg_attr(test, assert_instr(vpsrlvq))]
21977pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
21978    unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
21979}
21980
21981/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21982///
21983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
21984#[inline]
21985#[target_feature(enable = "avx512f")]
21986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21987#[cfg_attr(test, assert_instr(vpsrlvq))]
21988pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21989    unsafe {
21990        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
21991        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21992    }
21993}
21994
21995/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21996///
21997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
21998#[inline]
21999#[target_feature(enable = "avx512f")]
22000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22001#[cfg_attr(test, assert_instr(vpsrlvq))]
22002pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22003    unsafe {
22004        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22005        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22006    }
22007}
22008
22009/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22010///
22011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22012#[inline]
22013#[target_feature(enable = "avx512f,avx512vl")]
22014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22015#[cfg_attr(test, assert_instr(vpsrlvq))]
22016pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22017    unsafe {
22018        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22019        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22020    }
22021}
22022
22023/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22024///
22025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22026#[inline]
22027#[target_feature(enable = "avx512f,avx512vl")]
22028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22029#[cfg_attr(test, assert_instr(vpsrlvq))]
22030pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22031    unsafe {
22032        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22033        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22034    }
22035}
22036
22037/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22038///
22039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22040#[inline]
22041#[target_feature(enable = "avx512f,avx512vl")]
22042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22043#[cfg_attr(test, assert_instr(vpsrlvq))]
22044pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22045    unsafe {
22046        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22047        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22048    }
22049}
22050
22051/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22052///
22053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22054#[inline]
22055#[target_feature(enable = "avx512f,avx512vl")]
22056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22057#[cfg_attr(test, assert_instr(vpsrlvq))]
22058pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22059    unsafe {
22060        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22061        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22062    }
22063}
22064
22065/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22066///
22067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22068#[inline]
22069#[target_feature(enable = "avx512f")]
22070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22071#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22072#[rustc_legacy_const_generics(1)]
22073pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22074    unsafe {
22075        static_assert_uimm_bits!(MASK, 8);
22076        simd_shuffle!(
22077            a,
22078            a,
22079            [
22080                MASK as u32 & 0b11,
22081                (MASK as u32 >> 2) & 0b11,
22082                ((MASK as u32 >> 4) & 0b11),
22083                ((MASK as u32 >> 6) & 0b11),
22084                (MASK as u32 & 0b11) + 4,
22085                ((MASK as u32 >> 2) & 0b11) + 4,
22086                ((MASK as u32 >> 4) & 0b11) + 4,
22087                ((MASK as u32 >> 6) & 0b11) + 4,
22088                (MASK as u32 & 0b11) + 8,
22089                ((MASK as u32 >> 2) & 0b11) + 8,
22090                ((MASK as u32 >> 4) & 0b11) + 8,
22091                ((MASK as u32 >> 6) & 0b11) + 8,
22092                (MASK as u32 & 0b11) + 12,
22093                ((MASK as u32 >> 2) & 0b11) + 12,
22094                ((MASK as u32 >> 4) & 0b11) + 12,
22095                ((MASK as u32 >> 6) & 0b11) + 12,
22096            ],
22097        )
22098    }
22099}
22100
22101/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22102///
22103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22104#[inline]
22105#[target_feature(enable = "avx512f")]
22106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22107#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22108#[rustc_legacy_const_generics(3)]
22109pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22110    unsafe {
22111        static_assert_uimm_bits!(MASK, 8);
22112        let r = _mm512_permute_ps::<MASK>(a);
22113        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22114    }
22115}
22116
22117/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118///
22119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22120#[inline]
22121#[target_feature(enable = "avx512f")]
22122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22123#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22124#[rustc_legacy_const_generics(2)]
22125pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22126    unsafe {
22127        static_assert_uimm_bits!(MASK, 8);
22128        let r = _mm512_permute_ps::<MASK>(a);
22129        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22130    }
22131}
22132
22133/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22134///
22135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22136#[inline]
22137#[target_feature(enable = "avx512f,avx512vl")]
22138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22139#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22140#[rustc_legacy_const_generics(3)]
22141pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22142    unsafe {
22143        let r = _mm256_permute_ps::<MASK>(a);
22144        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22145    }
22146}
22147
22148/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22149///
22150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22151#[inline]
22152#[target_feature(enable = "avx512f,avx512vl")]
22153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22154#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22155#[rustc_legacy_const_generics(2)]
22156pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22157    unsafe {
22158        let r = _mm256_permute_ps::<MASK>(a);
22159        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22160    }
22161}
22162
22163/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22164///
22165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22166#[inline]
22167#[target_feature(enable = "avx512f,avx512vl")]
22168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22169#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22170#[rustc_legacy_const_generics(3)]
22171pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22172    unsafe {
22173        let r = _mm_permute_ps::<MASK>(a);
22174        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22175    }
22176}
22177
22178/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22179///
22180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22181#[inline]
22182#[target_feature(enable = "avx512f,avx512vl")]
22183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22184#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22185#[rustc_legacy_const_generics(2)]
22186pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22187    unsafe {
22188        let r = _mm_permute_ps::<MASK>(a);
22189        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22190    }
22191}
22192
22193/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22194///
22195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22196#[inline]
22197#[target_feature(enable = "avx512f")]
22198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22199#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22200#[rustc_legacy_const_generics(1)]
22201pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22202    unsafe {
22203        static_assert_uimm_bits!(MASK, 8);
22204        simd_shuffle!(
22205            a,
22206            a,
22207            [
22208                MASK as u32 & 0b1,
22209                ((MASK as u32 >> 1) & 0b1),
22210                ((MASK as u32 >> 2) & 0b1) + 2,
22211                ((MASK as u32 >> 3) & 0b1) + 2,
22212                ((MASK as u32 >> 4) & 0b1) + 4,
22213                ((MASK as u32 >> 5) & 0b1) + 4,
22214                ((MASK as u32 >> 6) & 0b1) + 6,
22215                ((MASK as u32 >> 7) & 0b1) + 6,
22216            ],
22217        )
22218    }
22219}
22220
22221/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22222///
22223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22224#[inline]
22225#[target_feature(enable = "avx512f")]
22226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22227#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22228#[rustc_legacy_const_generics(3)]
22229pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22230    unsafe {
22231        static_assert_uimm_bits!(MASK, 8);
22232        let r = _mm512_permute_pd::<MASK>(a);
22233        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22234    }
22235}
22236
22237/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22238///
22239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22240#[inline]
22241#[target_feature(enable = "avx512f")]
22242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22243#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22244#[rustc_legacy_const_generics(2)]
22245pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22246    unsafe {
22247        static_assert_uimm_bits!(MASK, 8);
22248        let r = _mm512_permute_pd::<MASK>(a);
22249        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22250    }
22251}
22252
22253/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22254///
22255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22256#[inline]
22257#[target_feature(enable = "avx512f,avx512vl")]
22258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22259#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22260#[rustc_legacy_const_generics(3)]
22261pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22262    unsafe {
22263        static_assert_uimm_bits!(MASK, 4);
22264        let r = _mm256_permute_pd::<MASK>(a);
22265        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22266    }
22267}
22268
22269/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22270///
22271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22272#[inline]
22273#[target_feature(enable = "avx512f,avx512vl")]
22274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22275#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22276#[rustc_legacy_const_generics(2)]
22277pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22278    unsafe {
22279        static_assert_uimm_bits!(MASK, 4);
22280        let r = _mm256_permute_pd::<MASK>(a);
22281        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22282    }
22283}
22284
22285/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22286///
22287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22288#[inline]
22289#[target_feature(enable = "avx512f,avx512vl")]
22290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22291#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22292#[rustc_legacy_const_generics(3)]
22293pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22294    unsafe {
22295        static_assert_uimm_bits!(IMM2, 2);
22296        let r = _mm_permute_pd::<IMM2>(a);
22297        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
22298    }
22299}
22300
22301/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22302///
22303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22304#[inline]
22305#[target_feature(enable = "avx512f,avx512vl")]
22306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22307#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22308#[rustc_legacy_const_generics(2)]
22309pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22310    unsafe {
22311        static_assert_uimm_bits!(IMM2, 2);
22312        let r = _mm_permute_pd::<IMM2>(a);
22313        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
22314    }
22315}
22316
22317/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22318///
22319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22320#[inline]
22321#[target_feature(enable = "avx512f")]
22322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22323#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22324#[rustc_legacy_const_generics(1)]
22325pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22326    unsafe {
22327        static_assert_uimm_bits!(MASK, 8);
22328        simd_shuffle!(
22329            a,
22330            a,
22331            [
22332                MASK as u32 & 0b11,
22333                (MASK as u32 >> 2) & 0b11,
22334                ((MASK as u32 >> 4) & 0b11),
22335                ((MASK as u32 >> 6) & 0b11),
22336                (MASK as u32 & 0b11) + 4,
22337                ((MASK as u32 >> 2) & 0b11) + 4,
22338                ((MASK as u32 >> 4) & 0b11) + 4,
22339                ((MASK as u32 >> 6) & 0b11) + 4,
22340            ],
22341        )
22342    }
22343}
22344
22345/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22346///
22347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22348#[inline]
22349#[target_feature(enable = "avx512f")]
22350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22351#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22352#[rustc_legacy_const_generics(3)]
22353pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22354    src: __m512i,
22355    k: __mmask8,
22356    a: __m512i,
22357) -> __m512i {
22358    unsafe {
22359        static_assert_uimm_bits!(MASK, 8);
22360        let r = _mm512_permutex_epi64::<MASK>(a);
22361        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
22362    }
22363}
22364
22365/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22366///
22367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22368#[inline]
22369#[target_feature(enable = "avx512f")]
22370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22371#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22372#[rustc_legacy_const_generics(2)]
22373pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22374    unsafe {
22375        static_assert_uimm_bits!(MASK, 8);
22376        let r = _mm512_permutex_epi64::<MASK>(a);
22377        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
22378    }
22379}
22380
22381/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22382///
22383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22384#[inline]
22385#[target_feature(enable = "avx512f,avx512vl")]
22386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22387#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22388#[rustc_legacy_const_generics(1)]
22389pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22390    unsafe {
22391        static_assert_uimm_bits!(MASK, 8);
22392        simd_shuffle!(
22393            a,
22394            a,
22395            [
22396                MASK as u32 & 0b11,
22397                (MASK as u32 >> 2) & 0b11,
22398                ((MASK as u32 >> 4) & 0b11),
22399                ((MASK as u32 >> 6) & 0b11),
22400            ],
22401        )
22402    }
22403}
22404
22405/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22406///
22407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22408#[inline]
22409#[target_feature(enable = "avx512f,avx512vl")]
22410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22411#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22412#[rustc_legacy_const_generics(3)]
22413pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22414    src: __m256i,
22415    k: __mmask8,
22416    a: __m256i,
22417) -> __m256i {
22418    unsafe {
22419        static_assert_uimm_bits!(MASK, 8);
22420        let r = _mm256_permutex_epi64::<MASK>(a);
22421        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
22422    }
22423}
22424
22425/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22426///
22427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22428#[inline]
22429#[target_feature(enable = "avx512f,avx512vl")]
22430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22431#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22432#[rustc_legacy_const_generics(2)]
22433pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22434    unsafe {
22435        static_assert_uimm_bits!(MASK, 8);
22436        let r = _mm256_permutex_epi64::<MASK>(a);
22437        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
22438    }
22439}
22440
22441/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22442///
22443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22444#[inline]
22445#[target_feature(enable = "avx512f")]
22446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22447#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22448#[rustc_legacy_const_generics(1)]
22449pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22450    unsafe {
22451        static_assert_uimm_bits!(MASK, 8);
22452        simd_shuffle!(
22453            a,
22454            a,
22455            [
22456                MASK as u32 & 0b11,
22457                (MASK as u32 >> 2) & 0b11,
22458                ((MASK as u32 >> 4) & 0b11),
22459                ((MASK as u32 >> 6) & 0b11),
22460                (MASK as u32 & 0b11) + 4,
22461                ((MASK as u32 >> 2) & 0b11) + 4,
22462                ((MASK as u32 >> 4) & 0b11) + 4,
22463                ((MASK as u32 >> 6) & 0b11) + 4,
22464            ],
22465        )
22466    }
22467}
22468
22469/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22470///
22471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22472#[inline]
22473#[target_feature(enable = "avx512f")]
22474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22475#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22476#[rustc_legacy_const_generics(3)]
22477pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22478    unsafe {
22479        let r = _mm512_permutex_pd::<MASK>(a);
22480        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22481    }
22482}
22483
22484/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22485///
22486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22487#[inline]
22488#[target_feature(enable = "avx512f")]
22489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22490#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22491#[rustc_legacy_const_generics(2)]
22492pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22493    unsafe {
22494        let r = _mm512_permutex_pd::<MASK>(a);
22495        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22496    }
22497}
22498
22499/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22500///
22501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22502#[inline]
22503#[target_feature(enable = "avx512f,avx512vl")]
22504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22505#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22506#[rustc_legacy_const_generics(1)]
22507pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22508    unsafe {
22509        static_assert_uimm_bits!(MASK, 8);
22510        simd_shuffle!(
22511            a,
22512            a,
22513            [
22514                MASK as u32 & 0b11,
22515                (MASK as u32 >> 2) & 0b11,
22516                ((MASK as u32 >> 4) & 0b11),
22517                ((MASK as u32 >> 6) & 0b11),
22518            ],
22519        )
22520    }
22521}
22522
22523/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22524///
22525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22526#[inline]
22527#[target_feature(enable = "avx512f,avx512vl")]
22528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22529#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22530#[rustc_legacy_const_generics(3)]
22531pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22532    unsafe {
22533        static_assert_uimm_bits!(MASK, 8);
22534        let r = _mm256_permutex_pd::<MASK>(a);
22535        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22536    }
22537}
22538
22539/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22540///
22541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22542#[inline]
22543#[target_feature(enable = "avx512f,avx512vl")]
22544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22545#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22546#[rustc_legacy_const_generics(2)]
22547pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22548    unsafe {
22549        static_assert_uimm_bits!(MASK, 8);
22550        let r = _mm256_permutex_pd::<MASK>(a);
22551        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22552    }
22553}
22554
22555/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22556///
22557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22558#[inline]
22559#[target_feature(enable = "avx512f")]
22560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22561#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22562pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22563    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22564}
22565
22566/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22567///
22568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22569#[inline]
22570#[target_feature(enable = "avx512f")]
22571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22572#[cfg_attr(test, assert_instr(vpermd))]
22573pub fn _mm512_mask_permutevar_epi32(
22574    src: __m512i,
22575    k: __mmask16,
22576    idx: __m512i,
22577    a: __m512i,
22578) -> __m512i {
22579    unsafe {
22580        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
22581        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22582    }
22583}
22584
22585/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22586///
22587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22588#[inline]
22589#[target_feature(enable = "avx512f")]
22590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22591#[cfg_attr(test, assert_instr(vpermilps))]
22592pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22593    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
22594}
22595
22596/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22597///
22598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22599#[inline]
22600#[target_feature(enable = "avx512f")]
22601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22602#[cfg_attr(test, assert_instr(vpermilps))]
22603pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22604    unsafe {
22605        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22606        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22607    }
22608}
22609
22610/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22611///
22612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22613#[inline]
22614#[target_feature(enable = "avx512f")]
22615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22616#[cfg_attr(test, assert_instr(vpermilps))]
22617pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22618    unsafe {
22619        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22620        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22621    }
22622}
22623
22624/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22625///
22626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22627#[inline]
22628#[target_feature(enable = "avx512f,avx512vl")]
22629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22630#[cfg_attr(test, assert_instr(vpermilps))]
22631pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22632    unsafe {
22633        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22634        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
22635    }
22636}
22637
22638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22639///
22640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22641#[inline]
22642#[target_feature(enable = "avx512f,avx512vl")]
22643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22644#[cfg_attr(test, assert_instr(vpermilps))]
22645pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22646    unsafe {
22647        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22648        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
22649    }
22650}
22651
22652/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22653///
22654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22655#[inline]
22656#[target_feature(enable = "avx512f,avx512vl")]
22657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22658#[cfg_attr(test, assert_instr(vpermilps))]
22659pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22660    unsafe {
22661        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22662        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
22663    }
22664}
22665
22666/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22667///
22668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22669#[inline]
22670#[target_feature(enable = "avx512f,avx512vl")]
22671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22672#[cfg_attr(test, assert_instr(vpermilps))]
22673pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22674    unsafe {
22675        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22676        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
22677    }
22678}
22679
22680/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22681///
22682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22683#[inline]
22684#[target_feature(enable = "avx512f")]
22685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22686#[cfg_attr(test, assert_instr(vpermilpd))]
22687pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22688    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22689}
22690
22691/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22692///
22693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22694#[inline]
22695#[target_feature(enable = "avx512f")]
22696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22697#[cfg_attr(test, assert_instr(vpermilpd))]
22698pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22699    unsafe {
22700        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22701        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
22702    }
22703}
22704
22705/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22706///
22707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22708#[inline]
22709#[target_feature(enable = "avx512f")]
22710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22711#[cfg_attr(test, assert_instr(vpermilpd))]
22712pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22713    unsafe {
22714        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22715        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
22716    }
22717}
22718
22719/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22720///
22721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22722#[inline]
22723#[target_feature(enable = "avx512f,avx512vl")]
22724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22725#[cfg_attr(test, assert_instr(vpermilpd))]
22726pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22727    unsafe {
22728        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22729        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
22730    }
22731}
22732
22733/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22734///
22735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22736#[inline]
22737#[target_feature(enable = "avx512f,avx512vl")]
22738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22739#[cfg_attr(test, assert_instr(vpermilpd))]
22740pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22741    unsafe {
22742        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22743        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
22744    }
22745}
22746
22747/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22748///
22749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22750#[inline]
22751#[target_feature(enable = "avx512f,avx512vl")]
22752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22753#[cfg_attr(test, assert_instr(vpermilpd))]
22754pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22755    unsafe {
22756        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22757        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
22758    }
22759}
22760
22761/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22762///
22763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22764#[inline]
22765#[target_feature(enable = "avx512f,avx512vl")]
22766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22767#[cfg_attr(test, assert_instr(vpermilpd))]
22768pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22769    unsafe {
22770        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22771        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
22772    }
22773}
22774
22775/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22778#[inline]
22779#[target_feature(enable = "avx512f")]
22780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22781#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22782pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22783    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22784}
22785
22786/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22787///
22788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22789#[inline]
22790#[target_feature(enable = "avx512f")]
22791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22792#[cfg_attr(test, assert_instr(vpermd))]
22793pub fn _mm512_mask_permutexvar_epi32(
22794    src: __m512i,
22795    k: __mmask16,
22796    idx: __m512i,
22797    a: __m512i,
22798) -> __m512i {
22799    unsafe {
22800        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22801        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22802    }
22803}
22804
22805/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22806///
22807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22808#[inline]
22809#[target_feature(enable = "avx512f")]
22810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22811#[cfg_attr(test, assert_instr(vpermd))]
22812pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22813    unsafe {
22814        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22815        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
22816    }
22817}
22818
22819/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22820///
22821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22822#[inline]
22823#[target_feature(enable = "avx512f,avx512vl")]
22824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22825#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22826pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22827    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
22828}
22829
22830/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22831///
22832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22833#[inline]
22834#[target_feature(enable = "avx512f,avx512vl")]
22835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22836#[cfg_attr(test, assert_instr(vpermd))]
22837pub fn _mm256_mask_permutexvar_epi32(
22838    src: __m256i,
22839    k: __mmask8,
22840    idx: __m256i,
22841    a: __m256i,
22842) -> __m256i {
22843    unsafe {
22844        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22845        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
22846    }
22847}
22848
22849/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22850///
22851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22852#[inline]
22853#[target_feature(enable = "avx512f,avx512vl")]
22854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22855#[cfg_attr(test, assert_instr(vpermd))]
22856pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22857    unsafe {
22858        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22859        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
22860    }
22861}
22862
22863/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22864///
22865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22866#[inline]
22867#[target_feature(enable = "avx512f")]
22868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22869#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22870pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22871    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
22872}
22873
22874/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22875///
22876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22877#[inline]
22878#[target_feature(enable = "avx512f")]
22879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22880#[cfg_attr(test, assert_instr(vpermq))]
22881pub fn _mm512_mask_permutexvar_epi64(
22882    src: __m512i,
22883    k: __mmask8,
22884    idx: __m512i,
22885    a: __m512i,
22886) -> __m512i {
22887    unsafe {
22888        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22889        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
22890    }
22891}
22892
22893/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22894///
22895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22896#[inline]
22897#[target_feature(enable = "avx512f")]
22898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22899#[cfg_attr(test, assert_instr(vpermq))]
22900pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22901    unsafe {
22902        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22903        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
22904    }
22905}
22906
22907/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22908///
22909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22910#[inline]
22911#[target_feature(enable = "avx512f,avx512vl")]
22912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22913#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22914pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22915    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22916}
22917
22918/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22919///
22920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22921#[inline]
22922#[target_feature(enable = "avx512f,avx512vl")]
22923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22924#[cfg_attr(test, assert_instr(vpermq))]
22925pub fn _mm256_mask_permutexvar_epi64(
22926    src: __m256i,
22927    k: __mmask8,
22928    idx: __m256i,
22929    a: __m256i,
22930) -> __m256i {
22931    unsafe {
22932        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22933        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
22934    }
22935}
22936
22937/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22938///
22939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
22940#[inline]
22941#[target_feature(enable = "avx512f,avx512vl")]
22942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22943#[cfg_attr(test, assert_instr(vpermq))]
22944pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22945    unsafe {
22946        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22947        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
22948    }
22949}
22950
22951/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
22954#[inline]
22955#[target_feature(enable = "avx512f")]
22956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957#[cfg_attr(test, assert_instr(vpermps))]
22958pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
22959    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
22960}
22961
22962/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22963///
22964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
22965#[inline]
22966#[target_feature(enable = "avx512f")]
22967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22968#[cfg_attr(test, assert_instr(vpermps))]
22969pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22970    unsafe {
22971        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
22972        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22973    }
22974}
22975
22976/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22977///
22978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
22979#[inline]
22980#[target_feature(enable = "avx512f")]
22981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22982#[cfg_attr(test, assert_instr(vpermps))]
22983pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22984    unsafe {
22985        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
22986        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22987    }
22988}
22989
22990/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22991///
22992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
22993#[inline]
22994#[target_feature(enable = "avx512f,avx512vl")]
22995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22996#[cfg_attr(test, assert_instr(vpermps))]
22997pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
22998    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
22999}
23000
23001/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23002///
23003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23004#[inline]
23005#[target_feature(enable = "avx512f,avx512vl")]
23006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23007#[cfg_attr(test, assert_instr(vpermps))]
23008pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23009    unsafe {
23010        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23011        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23012    }
23013}
23014
23015/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23016///
23017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23018#[inline]
23019#[target_feature(enable = "avx512f,avx512vl")]
23020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23021#[cfg_attr(test, assert_instr(vpermps))]
23022pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23023    unsafe {
23024        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23025        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23026    }
23027}
23028
23029/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23030///
23031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23032#[inline]
23033#[target_feature(enable = "avx512f")]
23034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23035#[cfg_attr(test, assert_instr(vpermpd))]
23036pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23037    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23038}
23039
23040/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23041///
23042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23043#[inline]
23044#[target_feature(enable = "avx512f")]
23045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23046#[cfg_attr(test, assert_instr(vpermpd))]
23047pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23048    unsafe {
23049        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23050        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23051    }
23052}
23053
23054/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23055///
23056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23057#[inline]
23058#[target_feature(enable = "avx512f")]
23059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23060#[cfg_attr(test, assert_instr(vpermpd))]
23061pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23062    unsafe {
23063        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23064        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23065    }
23066}
23067
23068/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23069///
23070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23071#[inline]
23072#[target_feature(enable = "avx512f,avx512vl")]
23073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23074#[cfg_attr(test, assert_instr(vpermpd))]
23075pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23076    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23077}
23078
23079/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23080///
23081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23082#[inline]
23083#[target_feature(enable = "avx512f,avx512vl")]
23084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23085#[cfg_attr(test, assert_instr(vpermpd))]
23086pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23087    unsafe {
23088        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23089        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23090    }
23091}
23092
23093/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23094///
23095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23096#[inline]
23097#[target_feature(enable = "avx512f,avx512vl")]
23098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23099#[cfg_attr(test, assert_instr(vpermpd))]
23100pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23101    unsafe {
23102        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23103        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23104    }
23105}
23106
23107/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23108///
23109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23110#[inline]
23111#[target_feature(enable = "avx512f")]
23112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23113#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23114pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23115    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23116}
23117
23118/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23119///
23120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23121#[inline]
23122#[target_feature(enable = "avx512f")]
23123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23124#[cfg_attr(test, assert_instr(vpermt2d))]
23125pub fn _mm512_mask_permutex2var_epi32(
23126    a: __m512i,
23127    k: __mmask16,
23128    idx: __m512i,
23129    b: __m512i,
23130) -> __m512i {
23131    unsafe {
23132        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23133        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23134    }
23135}
23136
23137/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23138///
23139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23140#[inline]
23141#[target_feature(enable = "avx512f")]
23142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23143#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23144pub fn _mm512_maskz_permutex2var_epi32(
23145    k: __mmask16,
23146    a: __m512i,
23147    idx: __m512i,
23148    b: __m512i,
23149) -> __m512i {
23150    unsafe {
23151        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23152        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23153    }
23154}
23155
23156/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23157///
23158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23159#[inline]
23160#[target_feature(enable = "avx512f")]
23161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23162#[cfg_attr(test, assert_instr(vpermi2d))]
23163pub fn _mm512_mask2_permutex2var_epi32(
23164    a: __m512i,
23165    idx: __m512i,
23166    k: __mmask16,
23167    b: __m512i,
23168) -> __m512i {
23169    unsafe {
23170        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23171        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
23172    }
23173}
23174
23175/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23176///
23177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23178#[inline]
23179#[target_feature(enable = "avx512f,avx512vl")]
23180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23181#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23182pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23183    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23184}
23185
23186/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23187///
23188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23189#[inline]
23190#[target_feature(enable = "avx512f,avx512vl")]
23191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23192#[cfg_attr(test, assert_instr(vpermt2d))]
23193pub fn _mm256_mask_permutex2var_epi32(
23194    a: __m256i,
23195    k: __mmask8,
23196    idx: __m256i,
23197    b: __m256i,
23198) -> __m256i {
23199    unsafe {
23200        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23201        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
23202    }
23203}
23204
23205/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23206///
23207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23208#[inline]
23209#[target_feature(enable = "avx512f,avx512vl")]
23210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23211#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23212pub fn _mm256_maskz_permutex2var_epi32(
23213    k: __mmask8,
23214    a: __m256i,
23215    idx: __m256i,
23216    b: __m256i,
23217) -> __m256i {
23218    unsafe {
23219        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23220        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23221    }
23222}
23223
23224/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23225///
23226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23227#[inline]
23228#[target_feature(enable = "avx512f,avx512vl")]
23229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23230#[cfg_attr(test, assert_instr(vpermi2d))]
23231pub fn _mm256_mask2_permutex2var_epi32(
23232    a: __m256i,
23233    idx: __m256i,
23234    k: __mmask8,
23235    b: __m256i,
23236) -> __m256i {
23237    unsafe {
23238        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23239        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
23240    }
23241}
23242
23243/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23244///
23245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23246#[inline]
23247#[target_feature(enable = "avx512f,avx512vl")]
23248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23249#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23250pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23251    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23252}
23253
23254/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23255///
23256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23257#[inline]
23258#[target_feature(enable = "avx512f,avx512vl")]
23259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23260#[cfg_attr(test, assert_instr(vpermt2d))]
23261pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23262    unsafe {
23263        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23264        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
23265    }
23266}
23267
23268/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23269///
23270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23271#[inline]
23272#[target_feature(enable = "avx512f,avx512vl")]
23273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23274#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23275pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23276    unsafe {
23277        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23278        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
23279    }
23280}
23281
23282/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23283///
23284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23285#[inline]
23286#[target_feature(enable = "avx512f,avx512vl")]
23287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23288#[cfg_attr(test, assert_instr(vpermi2d))]
23289pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23290    unsafe {
23291        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23292        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
23293    }
23294}
23295
23296/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23297///
23298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23299#[inline]
23300#[target_feature(enable = "avx512f")]
23301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23302#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23303pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23304    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23305}
23306
23307/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23308///
23309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23310#[inline]
23311#[target_feature(enable = "avx512f")]
23312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23313#[cfg_attr(test, assert_instr(vpermt2q))]
23314pub fn _mm512_mask_permutex2var_epi64(
23315    a: __m512i,
23316    k: __mmask8,
23317    idx: __m512i,
23318    b: __m512i,
23319) -> __m512i {
23320    unsafe {
23321        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23322        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
23323    }
23324}
23325
23326/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23327///
23328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23329#[inline]
23330#[target_feature(enable = "avx512f")]
23331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23332#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23333pub fn _mm512_maskz_permutex2var_epi64(
23334    k: __mmask8,
23335    a: __m512i,
23336    idx: __m512i,
23337    b: __m512i,
23338) -> __m512i {
23339    unsafe {
23340        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23341        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23342    }
23343}
23344
23345/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23346///
23347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23348#[inline]
23349#[target_feature(enable = "avx512f")]
23350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23351#[cfg_attr(test, assert_instr(vpermi2q))]
23352pub fn _mm512_mask2_permutex2var_epi64(
23353    a: __m512i,
23354    idx: __m512i,
23355    k: __mmask8,
23356    b: __m512i,
23357) -> __m512i {
23358    unsafe {
23359        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23360        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
23361    }
23362}
23363
23364/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23365///
23366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23367#[inline]
23368#[target_feature(enable = "avx512f,avx512vl")]
23369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23370#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23371pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23372    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23373}
23374
23375/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23376///
23377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23378#[inline]
23379#[target_feature(enable = "avx512f,avx512vl")]
23380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23381#[cfg_attr(test, assert_instr(vpermt2q))]
23382pub fn _mm256_mask_permutex2var_epi64(
23383    a: __m256i,
23384    k: __mmask8,
23385    idx: __m256i,
23386    b: __m256i,
23387) -> __m256i {
23388    unsafe {
23389        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23390        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
23391    }
23392}
23393
23394/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23395///
23396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23397#[inline]
23398#[target_feature(enable = "avx512f,avx512vl")]
23399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23400#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23401pub fn _mm256_maskz_permutex2var_epi64(
23402    k: __mmask8,
23403    a: __m256i,
23404    idx: __m256i,
23405    b: __m256i,
23406) -> __m256i {
23407    unsafe {
23408        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23409        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23410    }
23411}
23412
23413/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23414///
23415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23416#[inline]
23417#[target_feature(enable = "avx512f,avx512vl")]
23418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23419#[cfg_attr(test, assert_instr(vpermi2q))]
23420pub fn _mm256_mask2_permutex2var_epi64(
23421    a: __m256i,
23422    idx: __m256i,
23423    k: __mmask8,
23424    b: __m256i,
23425) -> __m256i {
23426    unsafe {
23427        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23428        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
23429    }
23430}
23431
23432/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23433///
23434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23435#[inline]
23436#[target_feature(enable = "avx512f,avx512vl")]
23437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23438#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23439pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23440    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23441}
23442
23443/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23444///
23445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23446#[inline]
23447#[target_feature(enable = "avx512f,avx512vl")]
23448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23449#[cfg_attr(test, assert_instr(vpermt2q))]
23450pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23451    unsafe {
23452        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23453        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
23454    }
23455}
23456
23457/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23458///
23459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23460#[inline]
23461#[target_feature(enable = "avx512f,avx512vl")]
23462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23463#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23464pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23465    unsafe {
23466        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23467        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
23468    }
23469}
23470
23471/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23472///
23473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23474#[inline]
23475#[target_feature(enable = "avx512f,avx512vl")]
23476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23477#[cfg_attr(test, assert_instr(vpermi2q))]
23478pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23479    unsafe {
23480        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23481        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
23482    }
23483}
23484
23485/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23486///
23487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23488#[inline]
23489#[target_feature(enable = "avx512f")]
23490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23491#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23492pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23493    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23494}
23495
23496/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23497///
23498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23499#[inline]
23500#[target_feature(enable = "avx512f")]
23501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23502#[cfg_attr(test, assert_instr(vpermt2ps))]
23503pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23504    unsafe {
23505        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23506        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
23507    }
23508}
23509
23510/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23511///
23512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23513#[inline]
23514#[target_feature(enable = "avx512f")]
23515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23516#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23517pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23518    unsafe {
23519        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23520        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23521    }
23522}
23523
23524/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23525///
23526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23527#[inline]
23528#[target_feature(enable = "avx512f")]
23529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23530#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23531pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23532    unsafe {
23533        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23534        let idx = _mm512_castsi512_ps(idx).as_f32x16();
23535        transmute(simd_select_bitmask(k, permute, idx))
23536    }
23537}
23538
23539/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23540///
23541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23542#[inline]
23543#[target_feature(enable = "avx512f,avx512vl")]
23544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23545#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23546pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23547    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23548}
23549
23550/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23551///
23552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23553#[inline]
23554#[target_feature(enable = "avx512f,avx512vl")]
23555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23556#[cfg_attr(test, assert_instr(vpermt2ps))]
23557pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23558    unsafe {
23559        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23560        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
23561    }
23562}
23563
23564/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23565///
23566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23567#[inline]
23568#[target_feature(enable = "avx512f,avx512vl")]
23569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23570#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23571pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23572    unsafe {
23573        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23574        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23575    }
23576}
23577
23578/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23579///
23580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23581#[inline]
23582#[target_feature(enable = "avx512f,avx512vl")]
23583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23584#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23585pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23586    unsafe {
23587        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23588        let idx = _mm256_castsi256_ps(idx).as_f32x8();
23589        transmute(simd_select_bitmask(k, permute, idx))
23590    }
23591}
23592
23593/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23594///
23595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23596#[inline]
23597#[target_feature(enable = "avx512f,avx512vl")]
23598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23599#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23600pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23601    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23602}
23603
23604/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23605///
23606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23607#[inline]
23608#[target_feature(enable = "avx512f,avx512vl")]
23609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23610#[cfg_attr(test, assert_instr(vpermt2ps))]
23611pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23612    unsafe {
23613        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23614        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
23615    }
23616}
23617
23618/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23619///
23620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23621#[inline]
23622#[target_feature(enable = "avx512f,avx512vl")]
23623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23624#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23625pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23626    unsafe {
23627        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23628        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23629    }
23630}
23631
23632/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23633///
23634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23635#[inline]
23636#[target_feature(enable = "avx512f,avx512vl")]
23637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23638#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23639pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23640    unsafe {
23641        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23642        let idx = _mm_castsi128_ps(idx).as_f32x4();
23643        transmute(simd_select_bitmask(k, permute, idx))
23644    }
23645}
23646
23647/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23648///
23649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23650#[inline]
23651#[target_feature(enable = "avx512f")]
23652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23653#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23654pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23655    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23656}
23657
23658/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23659///
23660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23661#[inline]
23662#[target_feature(enable = "avx512f")]
23663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23664#[cfg_attr(test, assert_instr(vpermt2pd))]
23665pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23666    unsafe {
23667        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23668        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
23669    }
23670}
23671
23672/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23673///
23674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23675#[inline]
23676#[target_feature(enable = "avx512f")]
23677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23678#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23679pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23680    unsafe {
23681        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23682        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23683    }
23684}
23685
23686/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23687///
23688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23689#[inline]
23690#[target_feature(enable = "avx512f")]
23691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23692#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23693pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23694    unsafe {
23695        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23696        let idx = _mm512_castsi512_pd(idx).as_f64x8();
23697        transmute(simd_select_bitmask(k, permute, idx))
23698    }
23699}
23700
23701/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23702///
23703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23704#[inline]
23705#[target_feature(enable = "avx512f,avx512vl")]
23706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23707#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23708pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23709    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23710}
23711
23712/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23713///
23714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23715#[inline]
23716#[target_feature(enable = "avx512f,avx512vl")]
23717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23718#[cfg_attr(test, assert_instr(vpermt2pd))]
23719pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23720    unsafe {
23721        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23722        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
23723    }
23724}
23725
23726/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23727///
23728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23729#[inline]
23730#[target_feature(enable = "avx512f,avx512vl")]
23731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23732#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23733pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23734    unsafe {
23735        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23736        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23737    }
23738}
23739
23740/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23741///
23742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23743#[inline]
23744#[target_feature(enable = "avx512f,avx512vl")]
23745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23746#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23747pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23748    unsafe {
23749        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23750        let idx = _mm256_castsi256_pd(idx).as_f64x4();
23751        transmute(simd_select_bitmask(k, permute, idx))
23752    }
23753}
23754
23755/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23756///
23757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23758#[inline]
23759#[target_feature(enable = "avx512f,avx512vl")]
23760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23761#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23762pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23763    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23764}
23765
23766/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23767///
23768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23769#[inline]
23770#[target_feature(enable = "avx512f,avx512vl")]
23771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23772#[cfg_attr(test, assert_instr(vpermt2pd))]
23773pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23774    unsafe {
23775        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23776        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
23777    }
23778}
23779
23780/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23783#[inline]
23784#[target_feature(enable = "avx512f,avx512vl")]
23785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23786#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23787pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23788    unsafe {
23789        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23790        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23791    }
23792}
23793
23794/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23795///
23796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23797#[inline]
23798#[target_feature(enable = "avx512f,avx512vl")]
23799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23800#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23801pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23802    unsafe {
23803        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23804        let idx = _mm_castsi128_pd(idx).as_f64x2();
23805        transmute(simd_select_bitmask(k, permute, idx))
23806    }
23807}
23808
23809/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23810///
23811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23812#[inline]
23813#[target_feature(enable = "avx512f")]
23814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23815#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23816#[rustc_legacy_const_generics(1)]
23817pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23818    unsafe {
23819        static_assert_uimm_bits!(MASK, 8);
23820        let r: i32x16 = simd_shuffle!(
23821            a.as_i32x16(),
23822            a.as_i32x16(),
23823            [
23824                MASK as u32 & 0b11,
23825                (MASK as u32 >> 2) & 0b11,
23826                (MASK as u32 >> 4) & 0b11,
23827                (MASK as u32 >> 6) & 0b11,
23828                (MASK as u32 & 0b11) + 4,
23829                ((MASK as u32 >> 2) & 0b11) + 4,
23830                ((MASK as u32 >> 4) & 0b11) + 4,
23831                ((MASK as u32 >> 6) & 0b11) + 4,
23832                (MASK as u32 & 0b11) + 8,
23833                ((MASK as u32 >> 2) & 0b11) + 8,
23834                ((MASK as u32 >> 4) & 0b11) + 8,
23835                ((MASK as u32 >> 6) & 0b11) + 8,
23836                (MASK as u32 & 0b11) + 12,
23837                ((MASK as u32 >> 2) & 0b11) + 12,
23838                ((MASK as u32 >> 4) & 0b11) + 12,
23839                ((MASK as u32 >> 6) & 0b11) + 12,
23840            ],
23841        );
23842        transmute(r)
23843    }
23844}
23845
23846/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23847///
23848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23849#[inline]
23850#[target_feature(enable = "avx512f")]
23851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23852#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23853#[rustc_legacy_const_generics(3)]
23854pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23855    src: __m512i,
23856    k: __mmask16,
23857    a: __m512i,
23858) -> __m512i {
23859    unsafe {
23860        static_assert_uimm_bits!(MASK, 8);
23861        let r = _mm512_shuffle_epi32::<MASK>(a);
23862        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
23863    }
23864}
23865
23866/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23867///
23868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23869#[inline]
23870#[target_feature(enable = "avx512f")]
23871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23872#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23873#[rustc_legacy_const_generics(2)]
23874pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23875    unsafe {
23876        static_assert_uimm_bits!(MASK, 8);
23877        let r = _mm512_shuffle_epi32::<MASK>(a);
23878        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
23879    }
23880}
23881
23882/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23883///
23884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23885#[inline]
23886#[target_feature(enable = "avx512f,avx512vl")]
23887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23888#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23889#[rustc_legacy_const_generics(3)]
23890pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23891    src: __m256i,
23892    k: __mmask8,
23893    a: __m256i,
23894) -> __m256i {
23895    unsafe {
23896        static_assert_uimm_bits!(MASK, 8);
23897        let r = _mm256_shuffle_epi32::<MASK>(a);
23898        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
23899    }
23900}
23901
23902/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23903///
23904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23905#[inline]
23906#[target_feature(enable = "avx512f,avx512vl")]
23907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23908#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23909#[rustc_legacy_const_generics(2)]
23910pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23911    unsafe {
23912        static_assert_uimm_bits!(MASK, 8);
23913        let r = _mm256_shuffle_epi32::<MASK>(a);
23914        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
23915    }
23916}
23917
23918/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23919///
23920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23921#[inline]
23922#[target_feature(enable = "avx512f,avx512vl")]
23923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23924#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23925#[rustc_legacy_const_generics(3)]
23926pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23927    src: __m128i,
23928    k: __mmask8,
23929    a: __m128i,
23930) -> __m128i {
23931    unsafe {
23932        static_assert_uimm_bits!(MASK, 8);
23933        let r = _mm_shuffle_epi32::<MASK>(a);
23934        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
23935    }
23936}
23937
23938/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23939///
23940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
23941#[inline]
23942#[target_feature(enable = "avx512f,avx512vl")]
23943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23944#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23945#[rustc_legacy_const_generics(2)]
23946pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
23947    unsafe {
23948        static_assert_uimm_bits!(MASK, 8);
23949        let r = _mm_shuffle_epi32::<MASK>(a);
23950        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
23951    }
23952}
23953
23954/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23955///
23956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
23957#[inline]
23958#[target_feature(enable = "avx512f")]
23959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23960#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
23961#[rustc_legacy_const_generics(2)]
23962pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23963    unsafe {
23964        static_assert_uimm_bits!(MASK, 8);
23965        simd_shuffle!(
23966            a,
23967            b,
23968            [
23969                MASK as u32 & 0b11,
23970                (MASK as u32 >> 2) & 0b11,
23971                ((MASK as u32 >> 4) & 0b11) + 16,
23972                ((MASK as u32 >> 6) & 0b11) + 16,
23973                (MASK as u32 & 0b11) + 4,
23974                ((MASK as u32 >> 2) & 0b11) + 4,
23975                ((MASK as u32 >> 4) & 0b11) + 20,
23976                ((MASK as u32 >> 6) & 0b11) + 20,
23977                (MASK as u32 & 0b11) + 8,
23978                ((MASK as u32 >> 2) & 0b11) + 8,
23979                ((MASK as u32 >> 4) & 0b11) + 24,
23980                ((MASK as u32 >> 6) & 0b11) + 24,
23981                (MASK as u32 & 0b11) + 12,
23982                ((MASK as u32 >> 2) & 0b11) + 12,
23983                ((MASK as u32 >> 4) & 0b11) + 28,
23984                ((MASK as u32 >> 6) & 0b11) + 28,
23985            ],
23986        )
23987    }
23988}
23989
23990/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23991///
23992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
23993#[inline]
23994#[target_feature(enable = "avx512f")]
23995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23996#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
23997#[rustc_legacy_const_generics(4)]
23998pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
23999    src: __m512,
24000    k: __mmask16,
24001    a: __m512,
24002    b: __m512,
24003) -> __m512 {
24004    unsafe {
24005        static_assert_uimm_bits!(MASK, 8);
24006        let r = _mm512_shuffle_ps::<MASK>(a, b);
24007        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24008    }
24009}
24010
24011/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24012///
24013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24014#[inline]
24015#[target_feature(enable = "avx512f")]
24016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24017#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24018#[rustc_legacy_const_generics(3)]
24019pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24020    unsafe {
24021        static_assert_uimm_bits!(MASK, 8);
24022        let r = _mm512_shuffle_ps::<MASK>(a, b);
24023        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24024    }
24025}
24026
24027/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24028///
24029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24030#[inline]
24031#[target_feature(enable = "avx512f,avx512vl")]
24032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24033#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24034#[rustc_legacy_const_generics(4)]
24035pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24036    src: __m256,
24037    k: __mmask8,
24038    a: __m256,
24039    b: __m256,
24040) -> __m256 {
24041    unsafe {
24042        static_assert_uimm_bits!(MASK, 8);
24043        let r = _mm256_shuffle_ps::<MASK>(a, b);
24044        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24045    }
24046}
24047
24048/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24049///
24050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24051#[inline]
24052#[target_feature(enable = "avx512f,avx512vl")]
24053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24054#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24055#[rustc_legacy_const_generics(3)]
24056pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24057    unsafe {
24058        static_assert_uimm_bits!(MASK, 8);
24059        let r = _mm256_shuffle_ps::<MASK>(a, b);
24060        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24061    }
24062}
24063
24064/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065///
24066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24067#[inline]
24068#[target_feature(enable = "avx512f,avx512vl")]
24069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24070#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24071#[rustc_legacy_const_generics(4)]
24072pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24073    src: __m128,
24074    k: __mmask8,
24075    a: __m128,
24076    b: __m128,
24077) -> __m128 {
24078    unsafe {
24079        static_assert_uimm_bits!(MASK, 8);
24080        let r = _mm_shuffle_ps::<MASK>(a, b);
24081        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24082    }
24083}
24084
24085/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24086///
24087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24088#[inline]
24089#[target_feature(enable = "avx512f,avx512vl")]
24090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24091#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24092#[rustc_legacy_const_generics(3)]
24093pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24094    unsafe {
24095        static_assert_uimm_bits!(MASK, 8);
24096        let r = _mm_shuffle_ps::<MASK>(a, b);
24097        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24098    }
24099}
24100
24101/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24102///
24103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24104#[inline]
24105#[target_feature(enable = "avx512f")]
24106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24107#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24108#[rustc_legacy_const_generics(2)]
24109pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24110    unsafe {
24111        static_assert_uimm_bits!(MASK, 8);
24112        simd_shuffle!(
24113            a,
24114            b,
24115            [
24116                MASK as u32 & 0b1,
24117                ((MASK as u32 >> 1) & 0b1) + 8,
24118                ((MASK as u32 >> 2) & 0b1) + 2,
24119                ((MASK as u32 >> 3) & 0b1) + 10,
24120                ((MASK as u32 >> 4) & 0b1) + 4,
24121                ((MASK as u32 >> 5) & 0b1) + 12,
24122                ((MASK as u32 >> 6) & 0b1) + 6,
24123                ((MASK as u32 >> 7) & 0b1) + 14,
24124            ],
24125        )
24126    }
24127}
24128
24129/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24130///
24131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24132#[inline]
24133#[target_feature(enable = "avx512f")]
24134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24135#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24136#[rustc_legacy_const_generics(4)]
24137pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24138    src: __m512d,
24139    k: __mmask8,
24140    a: __m512d,
24141    b: __m512d,
24142) -> __m512d {
24143    unsafe {
24144        static_assert_uimm_bits!(MASK, 8);
24145        let r = _mm512_shuffle_pd::<MASK>(a, b);
24146        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24147    }
24148}
24149
24150/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24151///
24152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24153#[inline]
24154#[target_feature(enable = "avx512f")]
24155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24156#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24157#[rustc_legacy_const_generics(3)]
24158pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24159    unsafe {
24160        static_assert_uimm_bits!(MASK, 8);
24161        let r = _mm512_shuffle_pd::<MASK>(a, b);
24162        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24163    }
24164}
24165
24166/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24167///
24168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24169#[inline]
24170#[target_feature(enable = "avx512f,avx512vl")]
24171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24172#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24173#[rustc_legacy_const_generics(4)]
24174pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24175    src: __m256d,
24176    k: __mmask8,
24177    a: __m256d,
24178    b: __m256d,
24179) -> __m256d {
24180    unsafe {
24181        static_assert_uimm_bits!(MASK, 8);
24182        let r = _mm256_shuffle_pd::<MASK>(a, b);
24183        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24184    }
24185}
24186
24187/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24188///
24189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24190#[inline]
24191#[target_feature(enable = "avx512f,avx512vl")]
24192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24193#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24194#[rustc_legacy_const_generics(3)]
24195pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24196    unsafe {
24197        static_assert_uimm_bits!(MASK, 8);
24198        let r = _mm256_shuffle_pd::<MASK>(a, b);
24199        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24200    }
24201}
24202
24203/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24204///
24205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24206#[inline]
24207#[target_feature(enable = "avx512f,avx512vl")]
24208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24209#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24210#[rustc_legacy_const_generics(4)]
24211pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24212    src: __m128d,
24213    k: __mmask8,
24214    a: __m128d,
24215    b: __m128d,
24216) -> __m128d {
24217    unsafe {
24218        static_assert_uimm_bits!(MASK, 8);
24219        let r = _mm_shuffle_pd::<MASK>(a, b);
24220        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
24221    }
24222}
24223
24224/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24225///
24226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24227#[inline]
24228#[target_feature(enable = "avx512f,avx512vl")]
24229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24230#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24231#[rustc_legacy_const_generics(3)]
24232pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24233    unsafe {
24234        static_assert_uimm_bits!(MASK, 8);
24235        let r = _mm_shuffle_pd::<MASK>(a, b);
24236        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
24237    }
24238}
24239
24240/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24241///
24242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24243#[inline]
24244#[target_feature(enable = "avx512f")]
24245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24246#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24247#[rustc_legacy_const_generics(2)]
24248pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24249    unsafe {
24250        static_assert_uimm_bits!(MASK, 8);
24251        let a = a.as_i32x16();
24252        let b = b.as_i32x16();
24253        let r: i32x16 = simd_shuffle!(
24254            a,
24255            b,
24256            [
24257                (MASK as u32 & 0b11) * 4 + 0,
24258                (MASK as u32 & 0b11) * 4 + 1,
24259                (MASK as u32 & 0b11) * 4 + 2,
24260                (MASK as u32 & 0b11) * 4 + 3,
24261                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24262                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24263                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24264                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24265                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24266                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24267                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24268                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24269                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24270                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24271                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24272                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24273            ],
24274        );
24275        transmute(r)
24276    }
24277}
24278
24279/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24280///
24281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24282#[inline]
24283#[target_feature(enable = "avx512f")]
24284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24285#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24286#[rustc_legacy_const_generics(4)]
24287pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24288    src: __m512i,
24289    k: __mmask16,
24290    a: __m512i,
24291    b: __m512i,
24292) -> __m512i {
24293    unsafe {
24294        static_assert_uimm_bits!(MASK, 8);
24295        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24296        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24297    }
24298}
24299
24300/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24301///
24302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24303#[inline]
24304#[target_feature(enable = "avx512f")]
24305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24306#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24307#[rustc_legacy_const_generics(3)]
24308pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24309    k: __mmask16,
24310    a: __m512i,
24311    b: __m512i,
24312) -> __m512i {
24313    unsafe {
24314        static_assert_uimm_bits!(MASK, 8);
24315        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24316        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24317    }
24318}
24319
24320/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24321///
24322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24323#[inline]
24324#[target_feature(enable = "avx512f,avx512vl")]
24325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24326#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24327#[rustc_legacy_const_generics(2)]
24328pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24329    unsafe {
24330        static_assert_uimm_bits!(MASK, 8);
24331        let a = a.as_i32x8();
24332        let b = b.as_i32x8();
24333        let r: i32x8 = simd_shuffle!(
24334            a,
24335            b,
24336            [
24337                (MASK as u32 & 0b1) * 4 + 0,
24338                (MASK as u32 & 0b1) * 4 + 1,
24339                (MASK as u32 & 0b1) * 4 + 2,
24340                (MASK as u32 & 0b1) * 4 + 3,
24341                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24342                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24343                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24344                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24345            ],
24346        );
24347        transmute(r)
24348    }
24349}
24350
24351/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24352///
24353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24354#[inline]
24355#[target_feature(enable = "avx512f,avx512vl")]
24356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24357#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24358#[rustc_legacy_const_generics(4)]
24359pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24360    src: __m256i,
24361    k: __mmask8,
24362    a: __m256i,
24363    b: __m256i,
24364) -> __m256i {
24365    unsafe {
24366        static_assert_uimm_bits!(MASK, 8);
24367        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24368        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24369    }
24370}
24371
24372/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24373///
24374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24375#[inline]
24376#[target_feature(enable = "avx512f,avx512vl")]
24377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24378#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24379#[rustc_legacy_const_generics(3)]
24380pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24381    unsafe {
24382        static_assert_uimm_bits!(MASK, 8);
24383        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24384        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24385    }
24386}
24387
24388/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24389///
24390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24391#[inline]
24392#[target_feature(enable = "avx512f")]
24393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24394#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24395#[rustc_legacy_const_generics(2)]
24396pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24397    unsafe {
24398        static_assert_uimm_bits!(MASK, 8);
24399        let a = a.as_i64x8();
24400        let b = b.as_i64x8();
24401        let r: i64x8 = simd_shuffle!(
24402            a,
24403            b,
24404            [
24405                (MASK as u32 & 0b11) * 2 + 0,
24406                (MASK as u32 & 0b11) * 2 + 1,
24407                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24408                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24409                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24410                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24411                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24412                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24413            ],
24414        );
24415        transmute(r)
24416    }
24417}
24418
24419/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24420///
24421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24422#[inline]
24423#[target_feature(enable = "avx512f")]
24424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24425#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24426#[rustc_legacy_const_generics(4)]
24427pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24428    src: __m512i,
24429    k: __mmask8,
24430    a: __m512i,
24431    b: __m512i,
24432) -> __m512i {
24433    unsafe {
24434        static_assert_uimm_bits!(MASK, 8);
24435        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24436        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
24437    }
24438}
24439
24440/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24441///
24442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24443#[inline]
24444#[target_feature(enable = "avx512f")]
24445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24446#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24447#[rustc_legacy_const_generics(3)]
24448pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24449    unsafe {
24450        static_assert_uimm_bits!(MASK, 8);
24451        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24452        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
24453    }
24454}
24455
24456/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24457///
24458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24459#[inline]
24460#[target_feature(enable = "avx512f,avx512vl")]
24461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24462#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24463#[rustc_legacy_const_generics(2)]
24464pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24465    unsafe {
24466        static_assert_uimm_bits!(MASK, 8);
24467        let a = a.as_i64x4();
24468        let b = b.as_i64x4();
24469        let r: i64x4 = simd_shuffle!(
24470            a,
24471            b,
24472            [
24473                (MASK as u32 & 0b1) * 2 + 0,
24474                (MASK as u32 & 0b1) * 2 + 1,
24475                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24476                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24477            ],
24478        );
24479        transmute(r)
24480    }
24481}
24482
24483/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24484///
24485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24486#[inline]
24487#[target_feature(enable = "avx512f,avx512vl")]
24488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24489#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24490#[rustc_legacy_const_generics(4)]
24491pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24492    src: __m256i,
24493    k: __mmask8,
24494    a: __m256i,
24495    b: __m256i,
24496) -> __m256i {
24497    unsafe {
24498        static_assert_uimm_bits!(MASK, 8);
24499        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24500        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24501    }
24502}
24503
24504/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24505///
24506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24507#[inline]
24508#[target_feature(enable = "avx512f,avx512vl")]
24509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24510#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24511#[rustc_legacy_const_generics(3)]
24512pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24513    unsafe {
24514        static_assert_uimm_bits!(MASK, 8);
24515        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24516        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24517    }
24518}
24519
24520/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24521///
24522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24523#[inline]
24524#[target_feature(enable = "avx512f")]
24525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24526#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24527#[rustc_legacy_const_generics(2)]
24528pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24529    unsafe {
24530        static_assert_uimm_bits!(MASK, 8);
24531        let a = a.as_f32x16();
24532        let b = b.as_f32x16();
24533        let r: f32x16 = simd_shuffle!(
24534            a,
24535            b,
24536            [
24537                (MASK as u32 & 0b11) * 4 + 0,
24538                (MASK as u32 & 0b11) * 4 + 1,
24539                (MASK as u32 & 0b11) * 4 + 2,
24540                (MASK as u32 & 0b11) * 4 + 3,
24541                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24542                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24543                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24544                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24545                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24546                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24547                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24548                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24549                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24550                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24551                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24552                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24553            ],
24554        );
24555        transmute(r)
24556    }
24557}
24558
24559/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24560///
24561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24562#[inline]
24563#[target_feature(enable = "avx512f")]
24564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24565#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24566#[rustc_legacy_const_generics(4)]
24567pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24568    src: __m512,
24569    k: __mmask16,
24570    a: __m512,
24571    b: __m512,
24572) -> __m512 {
24573    unsafe {
24574        static_assert_uimm_bits!(MASK, 8);
24575        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24576        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24577    }
24578}
24579
24580/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24581///
24582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24583#[inline]
24584#[target_feature(enable = "avx512f")]
24585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24586#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24587#[rustc_legacy_const_generics(3)]
24588pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24589    unsafe {
24590        static_assert_uimm_bits!(MASK, 8);
24591        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24592        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24593    }
24594}
24595
24596/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24597///
24598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24599#[inline]
24600#[target_feature(enable = "avx512f,avx512vl")]
24601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24602#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24603#[rustc_legacy_const_generics(2)]
24604pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24605    unsafe {
24606        static_assert_uimm_bits!(MASK, 8);
24607        let a = a.as_f32x8();
24608        let b = b.as_f32x8();
24609        let r: f32x8 = simd_shuffle!(
24610            a,
24611            b,
24612            [
24613                (MASK as u32 & 0b1) * 4 + 0,
24614                (MASK as u32 & 0b1) * 4 + 1,
24615                (MASK as u32 & 0b1) * 4 + 2,
24616                (MASK as u32 & 0b1) * 4 + 3,
24617                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24618                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24619                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24620                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24621            ],
24622        );
24623        transmute(r)
24624    }
24625}
24626
24627/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24628///
24629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24630#[inline]
24631#[target_feature(enable = "avx512f,avx512vl")]
24632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24633#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24634#[rustc_legacy_const_generics(4)]
24635pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24636    src: __m256,
24637    k: __mmask8,
24638    a: __m256,
24639    b: __m256,
24640) -> __m256 {
24641    unsafe {
24642        static_assert_uimm_bits!(MASK, 8);
24643        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24644        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24645    }
24646}
24647
24648/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24649///
24650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24651#[inline]
24652#[target_feature(enable = "avx512f,avx512vl")]
24653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24654#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24655#[rustc_legacy_const_generics(3)]
24656pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24657    unsafe {
24658        static_assert_uimm_bits!(MASK, 8);
24659        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24660        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24661    }
24662}
24663
24664/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24665///
24666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24667#[inline]
24668#[target_feature(enable = "avx512f")]
24669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24670#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24671#[rustc_legacy_const_generics(2)]
24672pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24673    unsafe {
24674        static_assert_uimm_bits!(MASK, 8);
24675        let a = a.as_f64x8();
24676        let b = b.as_f64x8();
24677        let r: f64x8 = simd_shuffle!(
24678            a,
24679            b,
24680            [
24681                (MASK as u32 & 0b11) * 2 + 0,
24682                (MASK as u32 & 0b11) * 2 + 1,
24683                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24684                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24685                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24686                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24687                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24688                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24689            ],
24690        );
24691        transmute(r)
24692    }
24693}
24694
24695/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24696///
24697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24698#[inline]
24699#[target_feature(enable = "avx512f")]
24700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24701#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24702#[rustc_legacy_const_generics(4)]
24703pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24704    src: __m512d,
24705    k: __mmask8,
24706    a: __m512d,
24707    b: __m512d,
24708) -> __m512d {
24709    unsafe {
24710        static_assert_uimm_bits!(MASK, 8);
24711        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24712        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24713    }
24714}
24715
24716/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24717///
24718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24719#[inline]
24720#[target_feature(enable = "avx512f")]
24721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24722#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24723#[rustc_legacy_const_generics(3)]
24724pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24725    unsafe {
24726        static_assert_uimm_bits!(MASK, 8);
24727        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24728        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24729    }
24730}
24731
24732/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24733///
24734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24735#[inline]
24736#[target_feature(enable = "avx512f,avx512vl")]
24737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24738#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24739#[rustc_legacy_const_generics(2)]
24740pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24741    unsafe {
24742        static_assert_uimm_bits!(MASK, 8);
24743        let a = a.as_f64x4();
24744        let b = b.as_f64x4();
24745        let r: f64x4 = simd_shuffle!(
24746            a,
24747            b,
24748            [
24749                (MASK as u32 & 0b1) * 2 + 0,
24750                (MASK as u32 & 0b1) * 2 + 1,
24751                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24752                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24753            ],
24754        );
24755        transmute(r)
24756    }
24757}
24758
24759/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24760///
24761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24762#[inline]
24763#[target_feature(enable = "avx512f,avx512vl")]
24764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24765#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24766#[rustc_legacy_const_generics(4)]
24767pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24768    src: __m256d,
24769    k: __mmask8,
24770    a: __m256d,
24771    b: __m256d,
24772) -> __m256d {
24773    unsafe {
24774        static_assert_uimm_bits!(MASK, 8);
24775        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24776        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24777    }
24778}
24779
24780/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24781///
24782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24783#[inline]
24784#[target_feature(enable = "avx512f,avx512vl")]
24785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24786#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24787#[rustc_legacy_const_generics(3)]
24788pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24789    unsafe {
24790        static_assert_uimm_bits!(MASK, 8);
24791        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24792        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24793    }
24794}
24795
24796/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24797///
24798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24799#[inline]
24800#[target_feature(enable = "avx512f")]
24801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24802#[cfg_attr(
24803    all(test, not(target_env = "msvc")),
24804    assert_instr(vextractf32x4, IMM8 = 3)
24805)]
24806#[rustc_legacy_const_generics(1)]
24807pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24808    unsafe {
24809        static_assert_uimm_bits!(IMM8, 2);
24810        match IMM8 & 0x3 {
24811            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24812            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24813            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24814            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24815        }
24816    }
24817}
24818
24819/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24820///
24821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24822#[inline]
24823#[target_feature(enable = "avx512f")]
24824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24825#[cfg_attr(
24826    all(test, not(target_env = "msvc")),
24827    assert_instr(vextractf32x4, IMM8 = 3)
24828)]
24829#[rustc_legacy_const_generics(3)]
24830pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24831    unsafe {
24832        static_assert_uimm_bits!(IMM8, 2);
24833        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24834        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24835    }
24836}
24837
24838/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24839///
24840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24841#[inline]
24842#[target_feature(enable = "avx512f")]
24843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24844#[cfg_attr(
24845    all(test, not(target_env = "msvc")),
24846    assert_instr(vextractf32x4, IMM8 = 3)
24847)]
24848#[rustc_legacy_const_generics(2)]
24849pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24850    unsafe {
24851        static_assert_uimm_bits!(IMM8, 2);
24852        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24853        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24854    }
24855}
24856
24857/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24858///
24859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24860#[inline]
24861#[target_feature(enable = "avx512f,avx512vl")]
24862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24863#[cfg_attr(
24864    all(test, not(target_env = "msvc")),
24865    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24866)]
24867#[rustc_legacy_const_generics(1)]
24868pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24869    unsafe {
24870        static_assert_uimm_bits!(IMM8, 1);
24871        match IMM8 & 0x1 {
24872            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24873            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24874        }
24875    }
24876}
24877
24878/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24879///
24880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24881#[inline]
24882#[target_feature(enable = "avx512f,avx512vl")]
24883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24884#[cfg_attr(
24885    all(test, not(target_env = "msvc")),
24886    assert_instr(vextractf32x4, IMM8 = 1)
24887)]
24888#[rustc_legacy_const_generics(3)]
24889pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24890    unsafe {
24891        static_assert_uimm_bits!(IMM8, 1);
24892        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24893        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24894    }
24895}
24896
24897/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24898///
24899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24900#[inline]
24901#[target_feature(enable = "avx512f,avx512vl")]
24902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24903#[cfg_attr(
24904    all(test, not(target_env = "msvc")),
24905    assert_instr(vextractf32x4, IMM8 = 1)
24906)]
24907#[rustc_legacy_const_generics(2)]
24908pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24909    unsafe {
24910        static_assert_uimm_bits!(IMM8, 1);
24911        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24912        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24913    }
24914}
24915
24916/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24917///
24918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24919#[inline]
24920#[target_feature(enable = "avx512f")]
24921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24922#[cfg_attr(
24923    all(test, not(target_env = "msvc")),
24924    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24925)]
24926#[rustc_legacy_const_generics(1)]
24927pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24928    unsafe {
24929        static_assert_uimm_bits!(IMM1, 1);
24930        match IMM1 {
24931            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24932            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24933        }
24934    }
24935}
24936
24937/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24938///
24939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24940#[inline]
24941#[target_feature(enable = "avx512f")]
24942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24943#[cfg_attr(
24944    all(test, not(target_env = "msvc")),
24945    assert_instr(vextracti64x4, IMM1 = 1)
24946)]
24947#[rustc_legacy_const_generics(3)]
24948pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
24949    src: __m256i,
24950    k: __mmask8,
24951    a: __m512i,
24952) -> __m256i {
24953    unsafe {
24954        static_assert_uimm_bits!(IMM1, 1);
24955        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24956        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24957    }
24958}
24959
24960/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24961///
24962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
24963#[inline]
24964#[target_feature(enable = "avx512f")]
24965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24966#[cfg_attr(
24967    all(test, not(target_env = "msvc")),
24968    assert_instr(vextracti64x4, IMM1 = 1)
24969)]
24970#[rustc_legacy_const_generics(2)]
24971pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
24972    unsafe {
24973        static_assert_uimm_bits!(IMM1, 1);
24974        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24975        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24976    }
24977}
24978
24979/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24980///
24981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
24982#[inline]
24983#[target_feature(enable = "avx512f")]
24984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24985#[cfg_attr(
24986    all(test, not(target_env = "msvc")),
24987    assert_instr(vextractf64x4, IMM8 = 1)
24988)]
24989#[rustc_legacy_const_generics(1)]
24990pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
24991    unsafe {
24992        static_assert_uimm_bits!(IMM8, 1);
24993        match IMM8 & 0x1 {
24994            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
24995            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
24996        }
24997    }
24998}
24999
25000/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25001///
25002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25003#[inline]
25004#[target_feature(enable = "avx512f")]
25005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25006#[cfg_attr(
25007    all(test, not(target_env = "msvc")),
25008    assert_instr(vextractf64x4, IMM8 = 1)
25009)]
25010#[rustc_legacy_const_generics(3)]
25011pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25012    src: __m256d,
25013    k: __mmask8,
25014    a: __m512d,
25015) -> __m256d {
25016    unsafe {
25017        static_assert_uimm_bits!(IMM8, 1);
25018        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25019        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25020    }
25021}
25022
25023/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25024///
25025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25026#[inline]
25027#[target_feature(enable = "avx512f")]
25028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25029#[cfg_attr(
25030    all(test, not(target_env = "msvc")),
25031    assert_instr(vextractf64x4, IMM8 = 1)
25032)]
25033#[rustc_legacy_const_generics(2)]
25034pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25035    unsafe {
25036        static_assert_uimm_bits!(IMM8, 1);
25037        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25038        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25039    }
25040}
25041
25042/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25043///
25044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25045#[inline]
25046#[target_feature(enable = "avx512f")]
25047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25048#[cfg_attr(
25049    all(test, not(target_env = "msvc")),
25050    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25051)]
25052#[rustc_legacy_const_generics(1)]
25053pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25054    unsafe {
25055        static_assert_uimm_bits!(IMM2, 2);
25056        let a = a.as_i32x16();
25057        let zero = i32x16::ZERO;
25058        let extract: i32x4 = match IMM2 {
25059            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25060            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25061            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25062            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25063        };
25064        transmute(extract)
25065    }
25066}
25067
25068/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25069///
25070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25071#[inline]
25072#[target_feature(enable = "avx512f")]
25073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25074#[cfg_attr(
25075    all(test, not(target_env = "msvc")),
25076    assert_instr(vextracti32x4, IMM2 = 3)
25077)]
25078#[rustc_legacy_const_generics(3)]
25079pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25080    src: __m128i,
25081    k: __mmask8,
25082    a: __m512i,
25083) -> __m128i {
25084    unsafe {
25085        static_assert_uimm_bits!(IMM2, 2);
25086        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25087        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25088    }
25089}
25090
25091/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25092///
25093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25094#[inline]
25095#[target_feature(enable = "avx512f")]
25096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25097#[cfg_attr(
25098    all(test, not(target_env = "msvc")),
25099    assert_instr(vextracti32x4, IMM2 = 3)
25100)]
25101#[rustc_legacy_const_generics(2)]
25102pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25103    unsafe {
25104        static_assert_uimm_bits!(IMM2, 2);
25105        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25106        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25107    }
25108}
25109
25110/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25111///
25112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25113#[inline]
25114#[target_feature(enable = "avx512f,avx512vl")]
25115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25116#[cfg_attr(
25117    all(test, not(target_env = "msvc")),
25118    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25119)]
25120#[rustc_legacy_const_generics(1)]
25121pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25122    unsafe {
25123        static_assert_uimm_bits!(IMM1, 1);
25124        let a = a.as_i32x8();
25125        let zero = i32x8::ZERO;
25126        let extract: i32x4 = match IMM1 {
25127            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25128            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25129        };
25130        transmute(extract)
25131    }
25132}
25133
25134/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25135///
25136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25137#[inline]
25138#[target_feature(enable = "avx512f,avx512vl")]
25139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25140#[cfg_attr(
25141    all(test, not(target_env = "msvc")),
25142    assert_instr(vextracti32x4, IMM1 = 1)
25143)]
25144#[rustc_legacy_const_generics(3)]
25145pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25146    src: __m128i,
25147    k: __mmask8,
25148    a: __m256i,
25149) -> __m128i {
25150    unsafe {
25151        static_assert_uimm_bits!(IMM1, 1);
25152        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25153        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25154    }
25155}
25156
25157/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25158///
25159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25160#[inline]
25161#[target_feature(enable = "avx512f,avx512vl")]
25162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25163#[cfg_attr(
25164    all(test, not(target_env = "msvc")),
25165    assert_instr(vextracti32x4, IMM1 = 1)
25166)]
25167#[rustc_legacy_const_generics(2)]
25168pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25169    unsafe {
25170        static_assert_uimm_bits!(IMM1, 1);
25171        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25172        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25173    }
25174}
25175
25176/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25177///
25178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25179#[inline]
25180#[target_feature(enable = "avx512f")]
25181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25182#[cfg_attr(test, assert_instr(vmovsldup))]
25183pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25184    unsafe {
25185        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25186        transmute(r)
25187    }
25188}
25189
25190/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25191///
25192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25193#[inline]
25194#[target_feature(enable = "avx512f")]
25195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25196#[cfg_attr(test, assert_instr(vmovsldup))]
25197pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25198    unsafe {
25199        let mov: f32x16 =
25200            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25201        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25202    }
25203}
25204
25205/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25206///
25207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25208#[inline]
25209#[target_feature(enable = "avx512f")]
25210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25211#[cfg_attr(test, assert_instr(vmovsldup))]
25212pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25213    unsafe {
25214        let mov: f32x16 =
25215            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25216        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25217    }
25218}
25219
25220/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25221///
25222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25223#[inline]
25224#[target_feature(enable = "avx512f,avx512vl")]
25225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25226#[cfg_attr(test, assert_instr(vmovsldup))]
25227pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25228    unsafe {
25229        let mov = _mm256_moveldup_ps(a);
25230        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25231    }
25232}
25233
25234/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25235///
25236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25237#[inline]
25238#[target_feature(enable = "avx512f,avx512vl")]
25239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25240#[cfg_attr(test, assert_instr(vmovsldup))]
25241pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25242    unsafe {
25243        let mov = _mm256_moveldup_ps(a);
25244        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25245    }
25246}
25247
25248/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25254#[cfg_attr(test, assert_instr(vmovsldup))]
25255pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25256    unsafe {
25257        let mov = _mm_moveldup_ps(a);
25258        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25259    }
25260}
25261
25262/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25263///
25264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25265#[inline]
25266#[target_feature(enable = "avx512f,avx512vl")]
25267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25268#[cfg_attr(test, assert_instr(vmovsldup))]
25269pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25270    unsafe {
25271        let mov = _mm_moveldup_ps(a);
25272        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25273    }
25274}
25275
25276/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25277///
25278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25279#[inline]
25280#[target_feature(enable = "avx512f")]
25281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25282#[cfg_attr(test, assert_instr(vmovshdup))]
25283pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25284    unsafe {
25285        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25286        transmute(r)
25287    }
25288}
25289
25290/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25291///
25292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25293#[inline]
25294#[target_feature(enable = "avx512f")]
25295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25296#[cfg_attr(test, assert_instr(vmovshdup))]
25297pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25298    unsafe {
25299        let mov: f32x16 =
25300            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25301        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25302    }
25303}
25304
25305/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25306///
25307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25308#[inline]
25309#[target_feature(enable = "avx512f")]
25310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25311#[cfg_attr(test, assert_instr(vmovshdup))]
25312pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25313    unsafe {
25314        let mov: f32x16 =
25315            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25316        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25317    }
25318}
25319
25320/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25321///
25322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25323#[inline]
25324#[target_feature(enable = "avx512f,avx512vl")]
25325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25326#[cfg_attr(test, assert_instr(vmovshdup))]
25327pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25328    unsafe {
25329        let mov = _mm256_movehdup_ps(a);
25330        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25331    }
25332}
25333
25334/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25335///
25336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25337#[inline]
25338#[target_feature(enable = "avx512f,avx512vl")]
25339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25340#[cfg_attr(test, assert_instr(vmovshdup))]
25341pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25342    unsafe {
25343        let mov = _mm256_movehdup_ps(a);
25344        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25345    }
25346}
25347
25348/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25349///
25350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25351#[inline]
25352#[target_feature(enable = "avx512f,avx512vl")]
25353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25354#[cfg_attr(test, assert_instr(vmovshdup))]
25355pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25356    unsafe {
25357        let mov = _mm_movehdup_ps(a);
25358        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25359    }
25360}
25361
25362/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25363///
25364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25365#[inline]
25366#[target_feature(enable = "avx512f,avx512vl")]
25367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25368#[cfg_attr(test, assert_instr(vmovshdup))]
25369pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25370    unsafe {
25371        let mov = _mm_movehdup_ps(a);
25372        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25373    }
25374}
25375
25376/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25377///
25378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25379#[inline]
25380#[target_feature(enable = "avx512f")]
25381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25382#[cfg_attr(test, assert_instr(vmovddup))]
25383pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25384    unsafe {
25385        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25386        transmute(r)
25387    }
25388}
25389
25390/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25391///
25392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25393#[inline]
25394#[target_feature(enable = "avx512f")]
25395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25396#[cfg_attr(test, assert_instr(vmovddup))]
25397pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25398    unsafe {
25399        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25400        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
25401    }
25402}
25403
25404/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25405///
25406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25407#[inline]
25408#[target_feature(enable = "avx512f")]
25409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25410#[cfg_attr(test, assert_instr(vmovddup))]
25411pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25412    unsafe {
25413        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25414        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
25415    }
25416}
25417
25418/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25419///
25420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25421#[inline]
25422#[target_feature(enable = "avx512f,avx512vl")]
25423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25424#[cfg_attr(test, assert_instr(vmovddup))]
25425pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25426    unsafe {
25427        let mov = _mm256_movedup_pd(a);
25428        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
25429    }
25430}
25431
25432/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25433///
25434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25435#[inline]
25436#[target_feature(enable = "avx512f,avx512vl")]
25437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25438#[cfg_attr(test, assert_instr(vmovddup))]
25439pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25440    unsafe {
25441        let mov = _mm256_movedup_pd(a);
25442        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
25443    }
25444}
25445
25446/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25447///
25448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25449#[inline]
25450#[target_feature(enable = "avx512f,avx512vl")]
25451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25452#[cfg_attr(test, assert_instr(vmovddup))]
25453pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25454    unsafe {
25455        let mov = _mm_movedup_pd(a);
25456        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
25457    }
25458}
25459
25460/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25461///
25462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25463#[inline]
25464#[target_feature(enable = "avx512f,avx512vl")]
25465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25466#[cfg_attr(test, assert_instr(vmovddup))]
25467pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25468    unsafe {
25469        let mov = _mm_movedup_pd(a);
25470        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
25471    }
25472}
25473
25474/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25475///
25476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25477#[inline]
25478#[target_feature(enable = "avx512f")]
25479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25480#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25481#[rustc_legacy_const_generics(2)]
25482pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25483    unsafe {
25484        static_assert_uimm_bits!(IMM8, 2);
25485        let a = a.as_i32x16();
25486        let b = _mm512_castsi128_si512(b).as_i32x16();
25487        let ret: i32x16 = match IMM8 & 0b11 {
25488            0 => {
25489                simd_shuffle!(
25490                    a,
25491                    b,
25492                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25493                )
25494            }
25495            1 => {
25496                simd_shuffle!(
25497                    a,
25498                    b,
25499                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25500                )
25501            }
25502            2 => {
25503                simd_shuffle!(
25504                    a,
25505                    b,
25506                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25507                )
25508            }
25509            _ => {
25510                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25511            }
25512        };
25513        transmute(ret)
25514    }
25515}
25516
25517/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25518///
25519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25520#[inline]
25521#[target_feature(enable = "avx512f")]
25522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25523#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25524#[rustc_legacy_const_generics(4)]
25525pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25526    src: __m512i,
25527    k: __mmask16,
25528    a: __m512i,
25529    b: __m128i,
25530) -> __m512i {
25531    unsafe {
25532        static_assert_uimm_bits!(IMM8, 2);
25533        let r = _mm512_inserti32x4::<IMM8>(a, b);
25534        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25535    }
25536}
25537
25538/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25539///
25540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25541#[inline]
25542#[target_feature(enable = "avx512f")]
25543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25544#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25545#[rustc_legacy_const_generics(3)]
25546pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25547    unsafe {
25548        static_assert_uimm_bits!(IMM8, 2);
25549        let r = _mm512_inserti32x4::<IMM8>(a, b);
25550        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25551    }
25552}
25553
25554/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25555///
25556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25557#[inline]
25558#[target_feature(enable = "avx512f,avx512vl")]
25559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25560#[cfg_attr(
25561    all(test, not(target_env = "msvc")),
25562    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25563)]
25564#[rustc_legacy_const_generics(2)]
25565pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25566    unsafe {
25567        static_assert_uimm_bits!(IMM8, 1);
25568        let a = a.as_i32x8();
25569        let b = _mm256_castsi128_si256(b).as_i32x8();
25570        let ret: i32x8 = match IMM8 & 0b1 {
25571            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25572            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25573        };
25574        transmute(ret)
25575    }
25576}
25577
25578/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25579///
25580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25581#[inline]
25582#[target_feature(enable = "avx512f,avx512vl")]
25583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25584#[cfg_attr(
25585    all(test, not(target_env = "msvc")),
25586    assert_instr(vinserti32x4, IMM8 = 1)
25587)]
25588#[rustc_legacy_const_generics(4)]
25589pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25590    src: __m256i,
25591    k: __mmask8,
25592    a: __m256i,
25593    b: __m128i,
25594) -> __m256i {
25595    unsafe {
25596        static_assert_uimm_bits!(IMM8, 1);
25597        let r = _mm256_inserti32x4::<IMM8>(a, b);
25598        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25599    }
25600}
25601
25602/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25603///
25604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25605#[inline]
25606#[target_feature(enable = "avx512f,avx512vl")]
25607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25608#[cfg_attr(
25609    all(test, not(target_env = "msvc")),
25610    assert_instr(vinserti32x4, IMM8 = 1)
25611)]
25612#[rustc_legacy_const_generics(3)]
25613pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25614    unsafe {
25615        static_assert_uimm_bits!(IMM8, 1);
25616        let r = _mm256_inserti32x4::<IMM8>(a, b);
25617        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25618    }
25619}
25620
25621/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25622///
25623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25624#[inline]
25625#[target_feature(enable = "avx512f")]
25626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25627#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25628#[rustc_legacy_const_generics(2)]
25629pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25630    unsafe {
25631        static_assert_uimm_bits!(IMM8, 1);
25632        let b = _mm512_castsi256_si512(b);
25633        match IMM8 & 0b1 {
25634            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25635            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25636        }
25637    }
25638}
25639
25640/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25641///
25642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25643#[inline]
25644#[target_feature(enable = "avx512f")]
25645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25646#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25647#[rustc_legacy_const_generics(4)]
25648pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25649    src: __m512i,
25650    k: __mmask8,
25651    a: __m512i,
25652    b: __m256i,
25653) -> __m512i {
25654    unsafe {
25655        static_assert_uimm_bits!(IMM8, 1);
25656        let r = _mm512_inserti64x4::<IMM8>(a, b);
25657        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25658    }
25659}
25660
25661/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25662///
25663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25664#[inline]
25665#[target_feature(enable = "avx512f")]
25666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25667#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25668#[rustc_legacy_const_generics(3)]
25669pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25670    unsafe {
25671        static_assert_uimm_bits!(IMM8, 1);
25672        let r = _mm512_inserti64x4::<IMM8>(a, b);
25673        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25674    }
25675}
25676
25677/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25678///
25679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25680#[inline]
25681#[target_feature(enable = "avx512f")]
25682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25683#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25684#[rustc_legacy_const_generics(2)]
25685pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25686    unsafe {
25687        static_assert_uimm_bits!(IMM8, 2);
25688        let b = _mm512_castps128_ps512(b);
25689        match IMM8 & 0b11 {
25690            0 => {
25691                simd_shuffle!(
25692                    a,
25693                    b,
25694                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25695                )
25696            }
25697            1 => {
25698                simd_shuffle!(
25699                    a,
25700                    b,
25701                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25702                )
25703            }
25704            2 => {
25705                simd_shuffle!(
25706                    a,
25707                    b,
25708                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25709                )
25710            }
25711            _ => {
25712                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25713            }
25714        }
25715    }
25716}
25717
25718/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25719///
25720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25721#[inline]
25722#[target_feature(enable = "avx512f")]
25723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25724#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25725#[rustc_legacy_const_generics(4)]
25726pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25727    src: __m512,
25728    k: __mmask16,
25729    a: __m512,
25730    b: __m128,
25731) -> __m512 {
25732    unsafe {
25733        static_assert_uimm_bits!(IMM8, 2);
25734        let r = _mm512_insertf32x4::<IMM8>(a, b);
25735        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25736    }
25737}
25738
25739/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25740///
25741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25742#[inline]
25743#[target_feature(enable = "avx512f")]
25744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25745#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25746#[rustc_legacy_const_generics(3)]
25747pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25748    unsafe {
25749        static_assert_uimm_bits!(IMM8, 2);
25750        let r = _mm512_insertf32x4::<IMM8>(a, b);
25751        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25752    }
25753}
25754
25755/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25756///
25757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25758#[inline]
25759#[target_feature(enable = "avx512f,avx512vl")]
25760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25761#[cfg_attr(
25762    all(test, not(target_env = "msvc")),
25763    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25764)]
25765#[rustc_legacy_const_generics(2)]
25766pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25767    unsafe {
25768        static_assert_uimm_bits!(IMM8, 1);
25769        let b = _mm256_castps128_ps256(b);
25770        match IMM8 & 0b1 {
25771            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25772            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25773        }
25774    }
25775}
25776
25777/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25778///
25779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25780#[inline]
25781#[target_feature(enable = "avx512f,avx512vl")]
25782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25783#[cfg_attr(
25784    all(test, not(target_env = "msvc")),
25785    assert_instr(vinsertf32x4, IMM8 = 1)
25786)]
25787#[rustc_legacy_const_generics(4)]
25788pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25789    src: __m256,
25790    k: __mmask8,
25791    a: __m256,
25792    b: __m128,
25793) -> __m256 {
25794    unsafe {
25795        static_assert_uimm_bits!(IMM8, 1);
25796        let r = _mm256_insertf32x4::<IMM8>(a, b);
25797        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25798    }
25799}
25800
25801/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25802///
25803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25804#[inline]
25805#[target_feature(enable = "avx512f,avx512vl")]
25806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25807#[cfg_attr(
25808    all(test, not(target_env = "msvc")),
25809    assert_instr(vinsertf32x4, IMM8 = 1)
25810)]
25811#[rustc_legacy_const_generics(3)]
25812pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25813    unsafe {
25814        static_assert_uimm_bits!(IMM8, 1);
25815        let r = _mm256_insertf32x4::<IMM8>(a, b);
25816        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25817    }
25818}
25819
25820/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25821///
25822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25823#[inline]
25824#[target_feature(enable = "avx512f")]
25825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25826#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25827#[rustc_legacy_const_generics(2)]
25828pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25829    unsafe {
25830        static_assert_uimm_bits!(IMM8, 1);
25831        let b = _mm512_castpd256_pd512(b);
25832        match IMM8 & 0b1 {
25833            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25834            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25835        }
25836    }
25837}
25838
25839/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25840///
25841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25842#[inline]
25843#[target_feature(enable = "avx512f")]
25844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25845#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25846#[rustc_legacy_const_generics(4)]
25847pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25848    src: __m512d,
25849    k: __mmask8,
25850    a: __m512d,
25851    b: __m256d,
25852) -> __m512d {
25853    unsafe {
25854        static_assert_uimm_bits!(IMM8, 1);
25855        let r = _mm512_insertf64x4::<IMM8>(a, b);
25856        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25857    }
25858}
25859
25860/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25861///
25862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25863#[inline]
25864#[target_feature(enable = "avx512f")]
25865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25866#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25867#[rustc_legacy_const_generics(3)]
25868pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25869    unsafe {
25870        static_assert_uimm_bits!(IMM8, 1);
25871        let r = _mm512_insertf64x4::<IMM8>(a, b);
25872        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25873    }
25874}
25875
25876/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25877///
25878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25879#[inline]
25880#[target_feature(enable = "avx512f")]
25881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25882#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25883pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25884    unsafe {
25885        let a = a.as_i32x16();
25886        let b = b.as_i32x16();
25887        #[rustfmt::skip]
25888        let r: i32x16 = simd_shuffle!(
25889            a, b,
25890            [ 2, 18, 3, 19,
25891              2 + 4, 18 + 4, 3 + 4, 19 + 4,
25892              2 + 8, 18 + 8, 3 + 8, 19 + 8,
25893              2 + 12, 18 + 12, 3 + 12, 19 + 12],
25894        );
25895        transmute(r)
25896    }
25897}
25898
25899/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25900///
25901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25902#[inline]
25903#[target_feature(enable = "avx512f")]
25904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25905#[cfg_attr(test, assert_instr(vpunpckhdq))]
25906pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25907    unsafe {
25908        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25909        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
25910    }
25911}
25912
25913/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25914///
25915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25916#[inline]
25917#[target_feature(enable = "avx512f")]
25918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25919#[cfg_attr(test, assert_instr(vpunpckhdq))]
25920pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25921    unsafe {
25922        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25923        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
25924    }
25925}
25926
25927/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25928///
25929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25930#[inline]
25931#[target_feature(enable = "avx512f,avx512vl")]
25932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25933#[cfg_attr(test, assert_instr(vpunpckhdq))]
25934pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25935    unsafe {
25936        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25937        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
25938    }
25939}
25940
25941/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25942///
25943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25944#[inline]
25945#[target_feature(enable = "avx512f,avx512vl")]
25946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25947#[cfg_attr(test, assert_instr(vpunpckhdq))]
25948pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25949    unsafe {
25950        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25951        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
25952    }
25953}
25954
25955/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25956///
25957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25958#[inline]
25959#[target_feature(enable = "avx512f,avx512vl")]
25960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25961#[cfg_attr(test, assert_instr(vpunpckhdq))]
25962pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25963    unsafe {
25964        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25965        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
25966    }
25967}
25968
25969/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25970///
25971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25972#[inline]
25973#[target_feature(enable = "avx512f,avx512vl")]
25974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25975#[cfg_attr(test, assert_instr(vpunpckhdq))]
25976pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25977    unsafe {
25978        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25979        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
25980    }
25981}
25982
25983/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25984///
25985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
25986#[inline]
25987#[target_feature(enable = "avx512f")]
25988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25989#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
25990pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
25991    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
25992}
25993
25994/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25995///
25996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
25997#[inline]
25998#[target_feature(enable = "avx512f")]
25999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26000#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26001pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26002    unsafe {
26003        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26004        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
26005    }
26006}
26007
26008/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26009///
26010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26011#[inline]
26012#[target_feature(enable = "avx512f")]
26013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26014#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26015pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26016    unsafe {
26017        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26018        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
26019    }
26020}
26021
26022/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26023///
26024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26025#[inline]
26026#[target_feature(enable = "avx512f,avx512vl")]
26027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26028#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26029pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26030    unsafe {
26031        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26032        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
26033    }
26034}
26035
26036/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26037///
26038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26039#[inline]
26040#[target_feature(enable = "avx512f,avx512vl")]
26041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26042#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26043pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26044    unsafe {
26045        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26046        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
26047    }
26048}
26049
26050/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26051///
26052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26053#[inline]
26054#[target_feature(enable = "avx512f,avx512vl")]
26055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26056#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26057pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26058    unsafe {
26059        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26060        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
26061    }
26062}
26063
26064/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26065///
26066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26067#[inline]
26068#[target_feature(enable = "avx512f,avx512vl")]
26069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26070#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26071pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26072    unsafe {
26073        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26074        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
26075    }
26076}
26077
26078/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26079///
26080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26081#[inline]
26082#[target_feature(enable = "avx512f")]
26083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26084#[cfg_attr(test, assert_instr(vunpckhps))]
26085pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26086    unsafe {
26087        #[rustfmt::skip]
26088        simd_shuffle!(
26089            a, b,
26090            [ 2, 18, 3, 19,
26091              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26092              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26093              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26094        )
26095    }
26096}
26097
26098/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26104#[cfg_attr(test, assert_instr(vunpckhps))]
26105pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26106    unsafe {
26107        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26108        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
26109    }
26110}
26111
26112/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26113///
26114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26115#[inline]
26116#[target_feature(enable = "avx512f")]
26117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26118#[cfg_attr(test, assert_instr(vunpckhps))]
26119pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26120    unsafe {
26121        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26122        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
26123    }
26124}
26125
26126/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26127///
26128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26129#[inline]
26130#[target_feature(enable = "avx512f,avx512vl")]
26131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26132#[cfg_attr(test, assert_instr(vunpckhps))]
26133pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26134    unsafe {
26135        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26136        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
26137    }
26138}
26139
26140/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26141///
26142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26143#[inline]
26144#[target_feature(enable = "avx512f,avx512vl")]
26145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26146#[cfg_attr(test, assert_instr(vunpckhps))]
26147pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26148    unsafe {
26149        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26150        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
26151    }
26152}
26153
26154/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26155///
26156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26157#[inline]
26158#[target_feature(enable = "avx512f,avx512vl")]
26159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26160#[cfg_attr(test, assert_instr(vunpckhps))]
26161pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26162    unsafe {
26163        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26164        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
26165    }
26166}
26167
26168/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26169///
26170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26171#[inline]
26172#[target_feature(enable = "avx512f,avx512vl")]
26173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26174#[cfg_attr(test, assert_instr(vunpckhps))]
26175pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26176    unsafe {
26177        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26178        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
26179    }
26180}
26181
26182/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26183///
26184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26185#[inline]
26186#[target_feature(enable = "avx512f")]
26187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26188#[cfg_attr(test, assert_instr(vunpckhpd))]
26189pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26190    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26191}
26192
26193/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26194///
26195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26196#[inline]
26197#[target_feature(enable = "avx512f")]
26198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26199#[cfg_attr(test, assert_instr(vunpckhpd))]
26200pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26201    unsafe {
26202        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26203        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
26204    }
26205}
26206
26207/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26208///
26209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26210#[inline]
26211#[target_feature(enable = "avx512f")]
26212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26213#[cfg_attr(test, assert_instr(vunpckhpd))]
26214pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26215    unsafe {
26216        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26217        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
26218    }
26219}
26220
26221/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26222///
26223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26224#[inline]
26225#[target_feature(enable = "avx512f,avx512vl")]
26226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26227#[cfg_attr(test, assert_instr(vunpckhpd))]
26228pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26229    unsafe {
26230        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26231        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
26232    }
26233}
26234
26235/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26236///
26237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26238#[inline]
26239#[target_feature(enable = "avx512f,avx512vl")]
26240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26241#[cfg_attr(test, assert_instr(vunpckhpd))]
26242pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26243    unsafe {
26244        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26245        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
26246    }
26247}
26248
26249/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26250///
26251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26252#[inline]
26253#[target_feature(enable = "avx512f,avx512vl")]
26254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26255#[cfg_attr(test, assert_instr(vunpckhpd))]
26256pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26257    unsafe {
26258        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26259        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
26260    }
26261}
26262
26263/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26264///
26265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26266#[inline]
26267#[target_feature(enable = "avx512f,avx512vl")]
26268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26269#[cfg_attr(test, assert_instr(vunpckhpd))]
26270pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26271    unsafe {
26272        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26273        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
26274    }
26275}
26276
26277/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26278///
26279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26280#[inline]
26281#[target_feature(enable = "avx512f")]
26282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26283#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26284pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26285    unsafe {
26286        let a = a.as_i32x16();
26287        let b = b.as_i32x16();
26288        #[rustfmt::skip]
26289        let r: i32x16 = simd_shuffle!(
26290            a, b,
26291            [ 0, 16, 1, 17,
26292              0 + 4, 16 + 4, 1 + 4, 17 + 4,
26293              0 + 8, 16 + 8, 1 + 8, 17 + 8,
26294              0 + 12, 16 + 12, 1 + 12, 17 + 12],
26295        );
26296        transmute(r)
26297    }
26298}
26299
26300/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26301///
26302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26303#[inline]
26304#[target_feature(enable = "avx512f")]
26305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26306#[cfg_attr(test, assert_instr(vpunpckldq))]
26307pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26308    unsafe {
26309        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26310        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
26311    }
26312}
26313
26314/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26315///
26316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26317#[inline]
26318#[target_feature(enable = "avx512f")]
26319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26320#[cfg_attr(test, assert_instr(vpunpckldq))]
26321pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26322    unsafe {
26323        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26324        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
26325    }
26326}
26327
26328/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26329///
26330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26331#[inline]
26332#[target_feature(enable = "avx512f,avx512vl")]
26333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26334#[cfg_attr(test, assert_instr(vpunpckldq))]
26335pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26336    unsafe {
26337        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26338        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
26339    }
26340}
26341
26342/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26343///
26344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26345#[inline]
26346#[target_feature(enable = "avx512f,avx512vl")]
26347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26348#[cfg_attr(test, assert_instr(vpunpckldq))]
26349pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26350    unsafe {
26351        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26352        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
26353    }
26354}
26355
26356/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26357///
26358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26359#[inline]
26360#[target_feature(enable = "avx512f,avx512vl")]
26361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26362#[cfg_attr(test, assert_instr(vpunpckldq))]
26363pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26364    unsafe {
26365        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26366        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
26367    }
26368}
26369
26370/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26371///
26372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26373#[inline]
26374#[target_feature(enable = "avx512f,avx512vl")]
26375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26376#[cfg_attr(test, assert_instr(vpunpckldq))]
26377pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26378    unsafe {
26379        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26380        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
26381    }
26382}
26383
26384/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26385///
26386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26387#[inline]
26388#[target_feature(enable = "avx512f")]
26389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26390#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26391pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26392    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26393}
26394
26395/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26396///
26397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26398#[inline]
26399#[target_feature(enable = "avx512f")]
26400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26401#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26402pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26403    unsafe {
26404        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26405        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
26406    }
26407}
26408
26409/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26410///
26411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26412#[inline]
26413#[target_feature(enable = "avx512f")]
26414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26415#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26416pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26417    unsafe {
26418        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26419        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
26420    }
26421}
26422
26423/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26424///
26425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26426#[inline]
26427#[target_feature(enable = "avx512f,avx512vl")]
26428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26429#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26430pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26431    unsafe {
26432        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26433        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
26434    }
26435}
26436
26437/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26438///
26439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26440#[inline]
26441#[target_feature(enable = "avx512f,avx512vl")]
26442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26443#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26444pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26445    unsafe {
26446        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26447        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
26448    }
26449}
26450
26451/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26452///
26453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26454#[inline]
26455#[target_feature(enable = "avx512f,avx512vl")]
26456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26457#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26458pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26459    unsafe {
26460        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26461        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
26462    }
26463}
26464
26465/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26466///
26467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26468#[inline]
26469#[target_feature(enable = "avx512f,avx512vl")]
26470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26471#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26472pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26473    unsafe {
26474        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26475        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
26476    }
26477}
26478
26479/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26480///
26481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26482#[inline]
26483#[target_feature(enable = "avx512f")]
26484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26485#[cfg_attr(test, assert_instr(vunpcklps))]
26486pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26487    unsafe {
26488        #[rustfmt::skip]
26489        simd_shuffle!(a, b,
26490                       [ 0, 16, 1, 17,
26491                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
26492                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
26493                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
26494        )
26495    }
26496}
26497
26498/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26499///
26500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26501#[inline]
26502#[target_feature(enable = "avx512f")]
26503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26504#[cfg_attr(test, assert_instr(vunpcklps))]
26505pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26506    unsafe {
26507        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26508        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
26509    }
26510}
26511
26512/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26513///
26514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26515#[inline]
26516#[target_feature(enable = "avx512f")]
26517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26518#[cfg_attr(test, assert_instr(vunpcklps))]
26519pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26520    unsafe {
26521        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26522        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
26523    }
26524}
26525
26526/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26527///
26528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26529#[inline]
26530#[target_feature(enable = "avx512f,avx512vl")]
26531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26532#[cfg_attr(test, assert_instr(vunpcklps))]
26533pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26534    unsafe {
26535        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26536        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
26537    }
26538}
26539
26540/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26541///
26542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26543#[inline]
26544#[target_feature(enable = "avx512f,avx512vl")]
26545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26546#[cfg_attr(test, assert_instr(vunpcklps))]
26547pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26548    unsafe {
26549        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26550        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
26551    }
26552}
26553
26554/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26555///
26556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26557#[inline]
26558#[target_feature(enable = "avx512f,avx512vl")]
26559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26560#[cfg_attr(test, assert_instr(vunpcklps))]
26561pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26562    unsafe {
26563        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26564        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
26565    }
26566}
26567
26568/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26569///
26570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26571#[inline]
26572#[target_feature(enable = "avx512f,avx512vl")]
26573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26574#[cfg_attr(test, assert_instr(vunpcklps))]
26575pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26576    unsafe {
26577        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26578        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
26579    }
26580}
26581
26582/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26583///
26584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26585#[inline]
26586#[target_feature(enable = "avx512f")]
26587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26588#[cfg_attr(test, assert_instr(vunpcklpd))]
26589pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26590    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26591}
26592
26593/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26594///
26595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26596#[inline]
26597#[target_feature(enable = "avx512f")]
26598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26599#[cfg_attr(test, assert_instr(vunpcklpd))]
26600pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26601    unsafe {
26602        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26603        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
26604    }
26605}
26606
26607/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26608///
26609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26610#[inline]
26611#[target_feature(enable = "avx512f")]
26612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26613#[cfg_attr(test, assert_instr(vunpcklpd))]
26614pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26615    unsafe {
26616        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26617        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
26618    }
26619}
26620
26621/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26622///
26623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26624#[inline]
26625#[target_feature(enable = "avx512f,avx512vl")]
26626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26627#[cfg_attr(test, assert_instr(vunpcklpd))]
26628pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26629    unsafe {
26630        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26631        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
26632    }
26633}
26634
26635/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26636///
26637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26638#[inline]
26639#[target_feature(enable = "avx512f,avx512vl")]
26640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26641#[cfg_attr(test, assert_instr(vunpcklpd))]
26642pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26643    unsafe {
26644        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26645        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
26646    }
26647}
26648
26649/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26650///
26651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26652#[inline]
26653#[target_feature(enable = "avx512f,avx512vl")]
26654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26655#[cfg_attr(test, assert_instr(vunpcklpd))]
26656pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26657    unsafe {
26658        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26659        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
26660    }
26661}
26662
26663/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26664///
26665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26666#[inline]
26667#[target_feature(enable = "avx512f,avx512vl")]
26668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26669#[cfg_attr(test, assert_instr(vunpcklpd))]
26670pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26671    unsafe {
26672        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26673        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
26674    }
26675}
26676
26677/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26678///
26679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26680#[inline]
26681#[target_feature(enable = "avx512f")]
26682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26683pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26684    unsafe {
26685        simd_shuffle!(
26686            a,
26687            _mm_undefined_ps(),
26688            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26689        )
26690    }
26691}
26692
26693/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26694///
26695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26696#[inline]
26697#[target_feature(enable = "avx512f")]
26698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26699pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26700    unsafe {
26701        simd_shuffle!(
26702            a,
26703            _mm256_undefined_ps(),
26704            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26705        )
26706    }
26707}
26708
26709/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26710///
26711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26712#[inline]
26713#[target_feature(enable = "avx512f")]
26714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26715pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26716    unsafe {
26717        simd_shuffle!(
26718            a,
26719            _mm_set1_ps(0.),
26720            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26721        )
26722    }
26723}
26724
26725/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26726///
26727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26728#[inline]
26729#[target_feature(enable = "avx512f")]
26730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26731pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26732    unsafe {
26733        simd_shuffle!(
26734            a,
26735            _mm256_set1_ps(0.),
26736            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26737        )
26738    }
26739}
26740
26741/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26742///
26743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26744#[inline]
26745#[target_feature(enable = "avx512f")]
26746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26747pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26748    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26749}
26750
26751/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26752///
26753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26754#[inline]
26755#[target_feature(enable = "avx512f")]
26756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26757pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26758    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26759}
26760
26761/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762///
26763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26764#[inline]
26765#[target_feature(enable = "avx512f")]
26766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26767pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26768    unsafe { transmute(a) }
26769}
26770
26771/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26777pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26778    unsafe { transmute(a) }
26779}
26780
26781/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782///
26783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26784#[inline]
26785#[target_feature(enable = "avx512f")]
26786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26787pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26788    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26789}
26790
26791/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26797pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26798    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26799}
26800
26801/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802///
26803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26804#[inline]
26805#[target_feature(enable = "avx512f")]
26806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26807pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26808    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26809}
26810
26811/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812///
26813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26814#[inline]
26815#[target_feature(enable = "avx512f")]
26816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26817pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26818    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26819}
26820
26821/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822///
26823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26824#[inline]
26825#[target_feature(enable = "avx512f")]
26826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26827pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26828    unsafe { simd_shuffle!(a, a, [0, 1]) }
26829}
26830
26831/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26838    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26839}
26840
26841/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842///
26843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26844#[inline]
26845#[target_feature(enable = "avx512f")]
26846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26847pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26848    unsafe { transmute(a) }
26849}
26850
26851/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26854#[inline]
26855#[target_feature(enable = "avx512f")]
26856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26857pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26858    unsafe { transmute(a) }
26859}
26860
26861/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862///
26863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26864#[inline]
26865#[target_feature(enable = "avx512f")]
26866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26867pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26868    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26869}
26870
26871/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872///
26873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26874#[inline]
26875#[target_feature(enable = "avx512f")]
26876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26877pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26878    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26879}
26880
26881/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882///
26883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26884#[inline]
26885#[target_feature(enable = "avx512f")]
26886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26887pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26888    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26889}
26890
26891/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892///
26893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26894#[inline]
26895#[target_feature(enable = "avx512f")]
26896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26897pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26898    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26899}
26900
26901/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26902///
26903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26904#[inline]
26905#[target_feature(enable = "avx512f")]
26906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26907pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26908    unsafe { simd_shuffle!(a, a, [0, 1]) }
26909}
26910
26911/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26912///
26913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26914#[inline]
26915#[target_feature(enable = "avx512f")]
26916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26917pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26918    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26919}
26920
26921/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26922///
26923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26924#[inline]
26925#[target_feature(enable = "avx512f")]
26926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26927pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26928    unsafe { transmute(a) }
26929}
26930
26931/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26932///
26933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26934#[inline]
26935#[target_feature(enable = "avx512f")]
26936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26937pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26938    unsafe { transmute(a) }
26939}
26940
26941/// Copy the lower 32-bit integer in a to dst.
26942///
26943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26944#[inline]
26945#[target_feature(enable = "avx512f")]
26946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26947#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(vmovd))]
26948pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26949    unsafe { simd_extract!(a.as_i32x16(), 0) }
26950}
26951
26952/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26953///
26954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26955#[inline]
26956#[target_feature(enable = "avx512f")]
26957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26958pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26959    unsafe { simd_extract!(a, 0) }
26960}
26961
26962/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26963///
26964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26965#[inline]
26966#[target_feature(enable = "avx512f")]
26967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26968pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26969    unsafe { simd_extract!(a, 0) }
26970}
26971
26972/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26973///
26974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26975#[inline]
26976#[target_feature(enable = "avx512f")]
26977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26978#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26979pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
26980    unsafe {
26981        let a = _mm512_castsi128_si512(a).as_i32x16();
26982        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
26983        transmute(ret)
26984    }
26985}
26986
26987/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26988///
26989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
26990#[inline]
26991#[target_feature(enable = "avx512f")]
26992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26993#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26994pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
26995    unsafe {
26996        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
26997        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
26998    }
26999}
27000
27001/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27002///
27003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
27004#[inline]
27005#[target_feature(enable = "avx512f")]
27006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27007#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27008pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
27009    unsafe {
27010        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
27011        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27012    }
27013}
27014
27015/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27016///
27017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27018#[inline]
27019#[target_feature(enable = "avx512f,avx512vl")]
27020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27021#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27022pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27023    unsafe {
27024        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27025        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27026    }
27027}
27028
27029/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27030///
27031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27032#[inline]
27033#[target_feature(enable = "avx512f,avx512vl")]
27034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27035#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27036pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27037    unsafe {
27038        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27039        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27040    }
27041}
27042
27043/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27044///
27045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27046#[inline]
27047#[target_feature(enable = "avx512f,avx512vl")]
27048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27049#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27050pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27051    unsafe {
27052        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27053        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
27054    }
27055}
27056
27057/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27058///
27059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27060#[inline]
27061#[target_feature(enable = "avx512f,avx512vl")]
27062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27063#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27064pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27065    unsafe {
27066        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27067        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
27068    }
27069}
27070
27071/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27072///
27073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27074#[inline]
27075#[target_feature(enable = "avx512f")]
27076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27077#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27078pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27079    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27080}
27081
27082/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27083///
27084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27085#[inline]
27086#[target_feature(enable = "avx512f")]
27087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27088#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27089pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27090    unsafe {
27091        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27092        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27093    }
27094}
27095
27096/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27097///
27098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27099#[inline]
27100#[target_feature(enable = "avx512f")]
27101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27102#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27103pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27104    unsafe {
27105        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27106        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27107    }
27108}
27109
27110/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27111///
27112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27113#[inline]
27114#[target_feature(enable = "avx512f,avx512vl")]
27115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27116#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27117pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27118    unsafe {
27119        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27120        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
27121    }
27122}
27123
27124/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27125///
27126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27127#[inline]
27128#[target_feature(enable = "avx512f,avx512vl")]
27129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27130#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27131pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27132    unsafe {
27133        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27134        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
27135    }
27136}
27137
27138/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27139///
27140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27141#[inline]
27142#[target_feature(enable = "avx512f,avx512vl")]
27143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27144#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27145pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27146    unsafe {
27147        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27148        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
27149    }
27150}
27151
27152/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27153///
27154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27155#[inline]
27156#[target_feature(enable = "avx512f,avx512vl")]
27157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27158#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27159pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27160    unsafe {
27161        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27162        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
27163    }
27164}
27165
27166/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27169#[inline]
27170#[target_feature(enable = "avx512f")]
27171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27172#[cfg_attr(test, assert_instr(vbroadcastss))]
27173pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27174    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27175}
27176
27177/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27178///
27179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27180#[inline]
27181#[target_feature(enable = "avx512f")]
27182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27183#[cfg_attr(test, assert_instr(vbroadcastss))]
27184pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27185    unsafe {
27186        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27187        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27188    }
27189}
27190
27191/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27192///
27193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27194#[inline]
27195#[target_feature(enable = "avx512f")]
27196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27197#[cfg_attr(test, assert_instr(vbroadcastss))]
27198pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27199    unsafe {
27200        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27201        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27202    }
27203}
27204
27205/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27206///
27207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27208#[inline]
27209#[target_feature(enable = "avx512f,avx512vl")]
27210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27211#[cfg_attr(test, assert_instr(vbroadcastss))]
27212pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27213    unsafe {
27214        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27215        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27216    }
27217}
27218
27219/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27220///
27221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27222#[inline]
27223#[target_feature(enable = "avx512f,avx512vl")]
27224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27225#[cfg_attr(test, assert_instr(vbroadcastss))]
27226pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27227    unsafe {
27228        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27229        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27230    }
27231}
27232
27233/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27234///
27235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27236#[inline]
27237#[target_feature(enable = "avx512f,avx512vl")]
27238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27239#[cfg_attr(test, assert_instr(vbroadcastss))]
27240pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27241    unsafe {
27242        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27243        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
27244    }
27245}
27246
27247/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27248///
27249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27250#[inline]
27251#[target_feature(enable = "avx512f,avx512vl")]
27252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27253#[cfg_attr(test, assert_instr(vbroadcastss))]
27254pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27255    unsafe {
27256        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27257        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
27258    }
27259}
27260
27261/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27262///
27263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27264#[inline]
27265#[target_feature(enable = "avx512f")]
27266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27267#[cfg_attr(test, assert_instr(vbroadcastsd))]
27268pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27269    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27270}
27271
27272/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27273///
27274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27275#[inline]
27276#[target_feature(enable = "avx512f")]
27277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27278#[cfg_attr(test, assert_instr(vbroadcastsd))]
27279pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27280    unsafe {
27281        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27282        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27283    }
27284}
27285
27286/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27287///
27288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27289#[inline]
27290#[target_feature(enable = "avx512f")]
27291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27292#[cfg_attr(test, assert_instr(vbroadcastsd))]
27293pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27294    unsafe {
27295        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27296        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27297    }
27298}
27299
27300/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27301///
27302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27303#[inline]
27304#[target_feature(enable = "avx512f,avx512vl")]
27305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27306#[cfg_attr(test, assert_instr(vbroadcastsd))]
27307pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27308    unsafe {
27309        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27310        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
27311    }
27312}
27313
27314/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27315///
27316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27317#[inline]
27318#[target_feature(enable = "avx512f,avx512vl")]
27319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27320#[cfg_attr(test, assert_instr(vbroadcastsd))]
27321pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27322    unsafe {
27323        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27324        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
27325    }
27326}
27327
27328/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27329///
27330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27331#[inline]
27332#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27334pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27335    unsafe {
27336        let a = a.as_i32x4();
27337        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27338        transmute(ret)
27339    }
27340}
27341
27342/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27345#[inline]
27346#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27348pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27349    unsafe {
27350        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27351        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27352    }
27353}
27354
27355/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27356///
27357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27358#[inline]
27359#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27361pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27362    unsafe {
27363        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27364        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27365    }
27366}
27367
27368/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27369///
27370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27371#[inline]
27372#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27374pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27375    unsafe {
27376        let a = a.as_i32x4();
27377        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27378        transmute(ret)
27379    }
27380}
27381
27382/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27383///
27384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27385#[inline]
27386#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27388pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27389    unsafe {
27390        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27391        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27392    }
27393}
27394
27395/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27396///
27397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27398#[inline]
27399#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27401pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27402    unsafe {
27403        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27404        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27405    }
27406}
27407
27408/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27409///
27410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27411#[inline]
27412#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27414pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27415    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27416}
27417
27418/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27419///
27420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27421#[inline]
27422#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27424pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27425    unsafe {
27426        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27427        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27428    }
27429}
27430
27431/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27432///
27433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27434#[inline]
27435#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27437pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27438    unsafe {
27439        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27440        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27441    }
27442}
27443
27444/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27445///
27446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27447#[inline]
27448#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27450pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27451    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27452}
27453
27454/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455///
27456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27457#[inline]
27458#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27460pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27461    unsafe {
27462        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27463        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27464    }
27465}
27466
27467/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27468///
27469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27470#[inline]
27471#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27473pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27474    unsafe {
27475        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27476        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27477    }
27478}
27479
27480/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27481///
27482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27483#[inline]
27484#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27486pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27487    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27488}
27489
27490/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27491///
27492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27493#[inline]
27494#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27496pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27497    unsafe {
27498        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27499        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27500    }
27501}
27502
27503/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27504///
27505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27506#[inline]
27507#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27509pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27510    unsafe {
27511        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27512        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27513    }
27514}
27515
27516/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27517///
27518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27519#[inline]
27520#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27522pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27523    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27524}
27525
27526/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27527///
27528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27529#[inline]
27530#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27532pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27533    unsafe {
27534        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27535        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27536    }
27537}
27538
27539/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27540///
27541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27542#[inline]
27543#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27545pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27546    unsafe {
27547        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27548        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27549    }
27550}
27551
27552/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27553///
27554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27555#[inline]
27556#[target_feature(enable = "avx512f")]
27557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27558#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27559pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27560    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
27561}
27562
27563/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27564///
27565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27566#[inline]
27567#[target_feature(enable = "avx512f,avx512vl")]
27568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27569#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27570pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27571    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
27572}
27573
27574/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27575///
27576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27577#[inline]
27578#[target_feature(enable = "avx512f,avx512vl")]
27579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27580#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27581pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27582    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
27583}
27584
27585/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27586///
27587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27588#[inline]
27589#[target_feature(enable = "avx512f")]
27590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27591#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27592pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27593    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
27594}
27595
27596/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27597///
27598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27599#[inline]
27600#[target_feature(enable = "avx512f,avx512vl")]
27601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27602#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27603pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27604    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
27605}
27606
27607/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27608///
27609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27610#[inline]
27611#[target_feature(enable = "avx512f,avx512vl")]
27612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27613#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27614pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27615    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
27616}
27617
27618/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27619///
27620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27621#[inline]
27622#[target_feature(enable = "avx512f")]
27623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27624#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27625pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27626    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
27627}
27628
27629/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27630///
27631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27632#[inline]
27633#[target_feature(enable = "avx512f,avx512vl")]
27634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27635#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27636pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27637    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
27638}
27639
27640/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27641///
27642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27643#[inline]
27644#[target_feature(enable = "avx512f,avx512vl")]
27645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27646#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27647pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27648    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
27649}
27650
27651/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27652///
27653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27654#[inline]
27655#[target_feature(enable = "avx512f")]
27656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27657#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27658pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27659    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
27660}
27661
27662/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27663///
27664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27665#[inline]
27666#[target_feature(enable = "avx512f,avx512vl")]
27667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27668#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27669pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27670    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
27671}
27672
27673/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27674///
27675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27676#[inline]
27677#[target_feature(enable = "avx512f,avx512vl")]
27678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27679#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27680pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27681    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
27682}
27683
27684/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27685///
27686/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27687///
27688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27689#[inline]
27690#[target_feature(enable = "avx512f")]
27691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27692#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27693#[rustc_legacy_const_generics(2)]
27694pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27695    unsafe {
27696        static_assert_uimm_bits!(IMM8, 8);
27697        let a = a.as_i32x16();
27698        let b = b.as_i32x16();
27699        let imm8: i32 = IMM8 % 16;
27700        let r: i32x16 = match imm8 {
27701            0 => simd_shuffle!(
27702                a,
27703                b,
27704                [
27705                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27706                ],
27707            ),
27708            1 => simd_shuffle!(
27709                a,
27710                b,
27711                [
27712                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27713                ],
27714            ),
27715            2 => simd_shuffle!(
27716                a,
27717                b,
27718                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27719            ),
27720            3 => simd_shuffle!(
27721                a,
27722                b,
27723                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27724            ),
27725            4 => simd_shuffle!(
27726                a,
27727                b,
27728                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27729            ),
27730            5 => simd_shuffle!(
27731                a,
27732                b,
27733                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27734            ),
27735            6 => simd_shuffle!(
27736                a,
27737                b,
27738                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27739            ),
27740            7 => simd_shuffle!(
27741                a,
27742                b,
27743                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27744            ),
27745            8 => simd_shuffle!(
27746                a,
27747                b,
27748                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27749            ),
27750            9 => simd_shuffle!(
27751                a,
27752                b,
27753                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27754            ),
27755            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27756            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27757            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27758            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27759            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27760            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27761            _ => unreachable_unchecked(),
27762        };
27763        transmute(r)
27764    }
27765}
27766
27767/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27768///
27769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27770#[inline]
27771#[target_feature(enable = "avx512f")]
27772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27773#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27774#[rustc_legacy_const_generics(4)]
27775pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27776    src: __m512i,
27777    k: __mmask16,
27778    a: __m512i,
27779    b: __m512i,
27780) -> __m512i {
27781    unsafe {
27782        static_assert_uimm_bits!(IMM8, 8);
27783        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27784        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
27785    }
27786}
27787
27788/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27789///
27790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27791#[inline]
27792#[target_feature(enable = "avx512f")]
27793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27794#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27795#[rustc_legacy_const_generics(3)]
27796pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27797    unsafe {
27798        static_assert_uimm_bits!(IMM8, 8);
27799        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27800        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
27801    }
27802}
27803
27804/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27805///
27806/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27807///
27808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27809#[inline]
27810#[target_feature(enable = "avx512f,avx512vl")]
27811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27812#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27813#[rustc_legacy_const_generics(2)]
27814pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27815    unsafe {
27816        static_assert_uimm_bits!(IMM8, 8);
27817        let a = a.as_i32x8();
27818        let b = b.as_i32x8();
27819        let imm8: i32 = IMM8 % 8;
27820        let r: i32x8 = match imm8 {
27821            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27822            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27823            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27824            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27825            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27826            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27827            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27828            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27829            _ => unreachable_unchecked(),
27830        };
27831        transmute(r)
27832    }
27833}
27834
27835/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27836///
27837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27838#[inline]
27839#[target_feature(enable = "avx512f,avx512vl")]
27840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27841#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27842#[rustc_legacy_const_generics(4)]
27843pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27844    src: __m256i,
27845    k: __mmask8,
27846    a: __m256i,
27847    b: __m256i,
27848) -> __m256i {
27849    unsafe {
27850        static_assert_uimm_bits!(IMM8, 8);
27851        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27852        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
27853    }
27854}
27855
27856/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27857///
27858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27859#[inline]
27860#[target_feature(enable = "avx512f,avx512vl")]
27861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27862#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27863#[rustc_legacy_const_generics(3)]
27864pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27865    unsafe {
27866        static_assert_uimm_bits!(IMM8, 8);
27867        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27868        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
27869    }
27870}
27871
27872/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27873///
27874/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27875///
27876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27877#[inline]
27878#[target_feature(enable = "avx512f,avx512vl")]
27879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27880#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27881#[rustc_legacy_const_generics(2)]
27882pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27883    unsafe {
27884        static_assert_uimm_bits!(IMM8, 8);
27885        let a = a.as_i32x4();
27886        let b = b.as_i32x4();
27887        let imm8: i32 = IMM8 % 4;
27888        let r: i32x4 = match imm8 {
27889            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27890            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27891            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27892            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27893            _ => unreachable_unchecked(),
27894        };
27895        transmute(r)
27896    }
27897}
27898
27899/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27900///
27901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27902#[inline]
27903#[target_feature(enable = "avx512f,avx512vl")]
27904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27905#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27906#[rustc_legacy_const_generics(4)]
27907pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27908    src: __m128i,
27909    k: __mmask8,
27910    a: __m128i,
27911    b: __m128i,
27912) -> __m128i {
27913    unsafe {
27914        static_assert_uimm_bits!(IMM8, 8);
27915        let r = _mm_alignr_epi32::<IMM8>(a, b);
27916        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
27917    }
27918}
27919
27920/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27921///
27922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27923#[inline]
27924#[target_feature(enable = "avx512f,avx512vl")]
27925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27926#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27927#[rustc_legacy_const_generics(3)]
27928pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27929    unsafe {
27930        static_assert_uimm_bits!(IMM8, 8);
27931        let r = _mm_alignr_epi32::<IMM8>(a, b);
27932        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
27933    }
27934}
27935
27936/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27937///
27938/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27939///
27940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27941#[inline]
27942#[target_feature(enable = "avx512f")]
27943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27944#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27945#[rustc_legacy_const_generics(2)]
27946pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27947    unsafe {
27948        static_assert_uimm_bits!(IMM8, 8);
27949        let imm8: i32 = IMM8 % 8;
27950        let r: i64x8 = match imm8 {
27951            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27952            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27953            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27954            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27955            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27956            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27957            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27958            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27959            _ => unreachable_unchecked(),
27960        };
27961        transmute(r)
27962    }
27963}
27964
27965/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27966///
27967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27968#[inline]
27969#[target_feature(enable = "avx512f")]
27970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27971#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27972#[rustc_legacy_const_generics(4)]
27973pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27974    src: __m512i,
27975    k: __mmask8,
27976    a: __m512i,
27977    b: __m512i,
27978) -> __m512i {
27979    unsafe {
27980        static_assert_uimm_bits!(IMM8, 8);
27981        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27982        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
27983    }
27984}
27985
27986/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27987///
27988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
27989#[inline]
27990#[target_feature(enable = "avx512f")]
27991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27992#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27993#[rustc_legacy_const_generics(3)]
27994pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27995    unsafe {
27996        static_assert_uimm_bits!(IMM8, 8);
27997        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27998        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
27999    }
28000}
28001
28002/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
28003///
28004/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
28005///
28006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
28007#[inline]
28008#[target_feature(enable = "avx512f,avx512vl")]
28009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28010#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28011#[rustc_legacy_const_generics(2)]
28012pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28013    unsafe {
28014        static_assert_uimm_bits!(IMM8, 8);
28015        let imm8: i32 = IMM8 % 4;
28016        let r: i64x4 = match imm8 {
28017            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
28018            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
28019            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
28020            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
28021            _ => unreachable_unchecked(),
28022        };
28023        transmute(r)
28024    }
28025}
28026
28027/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28028///
28029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28030#[inline]
28031#[target_feature(enable = "avx512f,avx512vl")]
28032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28033#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28034#[rustc_legacy_const_generics(4)]
28035pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28036    src: __m256i,
28037    k: __mmask8,
28038    a: __m256i,
28039    b: __m256i,
28040) -> __m256i {
28041    unsafe {
28042        static_assert_uimm_bits!(IMM8, 8);
28043        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28044        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
28045    }
28046}
28047
28048/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28049///
28050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28051#[inline]
28052#[target_feature(enable = "avx512f,avx512vl")]
28053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28054#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28055#[rustc_legacy_const_generics(3)]
28056pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28057    unsafe {
28058        static_assert_uimm_bits!(IMM8, 8);
28059        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28060        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
28061    }
28062}
28063
28064/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28065///
28066/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28067///
28068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28069#[inline]
28070#[target_feature(enable = "avx512f,avx512vl")]
28071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28072#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28073#[rustc_legacy_const_generics(2)]
28074pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28075    unsafe {
28076        static_assert_uimm_bits!(IMM8, 8);
28077        let imm8: i32 = IMM8 % 2;
28078        let r: i64x2 = match imm8 {
28079            0 => simd_shuffle!(a, b, [2, 3]),
28080            1 => simd_shuffle!(a, b, [3, 0]),
28081            _ => unreachable_unchecked(),
28082        };
28083        transmute(r)
28084    }
28085}
28086
28087/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28088///
28089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28090#[inline]
28091#[target_feature(enable = "avx512f,avx512vl")]
28092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28093#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28094#[rustc_legacy_const_generics(4)]
28095pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28096    src: __m128i,
28097    k: __mmask8,
28098    a: __m128i,
28099    b: __m128i,
28100) -> __m128i {
28101    unsafe {
28102        static_assert_uimm_bits!(IMM8, 8);
28103        let r = _mm_alignr_epi64::<IMM8>(a, b);
28104        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
28105    }
28106}
28107
28108/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28109///
28110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28111#[inline]
28112#[target_feature(enable = "avx512f,avx512vl")]
28113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28114#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28115#[rustc_legacy_const_generics(3)]
28116pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28117    unsafe {
28118        static_assert_uimm_bits!(IMM8, 8);
28119        let r = _mm_alignr_epi64::<IMM8>(a, b);
28120        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
28121    }
28122}
28123
28124/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28125///
28126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28127#[inline]
28128#[target_feature(enable = "avx512f")]
28129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28130#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28131pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28132    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28133}
28134
28135/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136///
28137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28138#[inline]
28139#[target_feature(enable = "avx512f")]
28140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28141#[cfg_attr(test, assert_instr(vpandd))]
28142pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28143    unsafe {
28144        let and = _mm512_and_epi32(a, b).as_i32x16();
28145        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
28146    }
28147}
28148
28149/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28150///
28151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28152#[inline]
28153#[target_feature(enable = "avx512f")]
28154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28155#[cfg_attr(test, assert_instr(vpandd))]
28156pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28157    unsafe {
28158        let and = _mm512_and_epi32(a, b).as_i32x16();
28159        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
28160    }
28161}
28162
28163/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28164///
28165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28166#[inline]
28167#[target_feature(enable = "avx512f,avx512vl")]
28168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28169#[cfg_attr(test, assert_instr(vpandd))]
28170pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28171    unsafe {
28172        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28173        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
28174    }
28175}
28176
28177/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28178///
28179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28180#[inline]
28181#[target_feature(enable = "avx512f,avx512vl")]
28182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28183#[cfg_attr(test, assert_instr(vpandd))]
28184pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28185    unsafe {
28186        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28187        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
28188    }
28189}
28190
28191/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28192///
28193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28194#[inline]
28195#[target_feature(enable = "avx512f,avx512vl")]
28196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28197#[cfg_attr(test, assert_instr(vpandd))]
28198pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28199    unsafe {
28200        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28201        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
28202    }
28203}
28204
28205/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28206///
28207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28208#[inline]
28209#[target_feature(enable = "avx512f,avx512vl")]
28210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28211#[cfg_attr(test, assert_instr(vpandd))]
28212pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28213    unsafe {
28214        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28215        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
28216    }
28217}
28218
28219/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28220///
28221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28222#[inline]
28223#[target_feature(enable = "avx512f")]
28224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28225#[cfg_attr(test, assert_instr(vpandq))]
28226pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28227    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
28228}
28229
28230/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28231///
28232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28233#[inline]
28234#[target_feature(enable = "avx512f")]
28235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28236#[cfg_attr(test, assert_instr(vpandq))]
28237pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28238    unsafe {
28239        let and = _mm512_and_epi64(a, b).as_i64x8();
28240        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
28241    }
28242}
28243
28244/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28245///
28246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28247#[inline]
28248#[target_feature(enable = "avx512f")]
28249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28250#[cfg_attr(test, assert_instr(vpandq))]
28251pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28252    unsafe {
28253        let and = _mm512_and_epi64(a, b).as_i64x8();
28254        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
28255    }
28256}
28257
28258/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28259///
28260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28261#[inline]
28262#[target_feature(enable = "avx512f,avx512vl")]
28263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28264#[cfg_attr(test, assert_instr(vpandq))]
28265pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28266    unsafe {
28267        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28268        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
28269    }
28270}
28271
28272/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28273///
28274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28275#[inline]
28276#[target_feature(enable = "avx512f,avx512vl")]
28277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28278#[cfg_attr(test, assert_instr(vpandq))]
28279pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28280    unsafe {
28281        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28282        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
28283    }
28284}
28285
28286/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28287///
28288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28289#[inline]
28290#[target_feature(enable = "avx512f,avx512vl")]
28291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28292#[cfg_attr(test, assert_instr(vpandq))]
28293pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28294    unsafe {
28295        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28296        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
28297    }
28298}
28299
28300/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28301///
28302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28303#[inline]
28304#[target_feature(enable = "avx512f,avx512vl")]
28305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28306#[cfg_attr(test, assert_instr(vpandq))]
28307pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28308    unsafe {
28309        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28310        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
28311    }
28312}
28313
28314/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28315///
28316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28317#[inline]
28318#[target_feature(enable = "avx512f")]
28319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28320#[cfg_attr(test, assert_instr(vpandq))]
28321pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28322    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28323}
28324
28325/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28326///
28327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28328#[inline]
28329#[target_feature(enable = "avx512f")]
28330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28331#[cfg_attr(test, assert_instr(vporq))]
28332pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28333    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28334}
28335
28336/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28337///
28338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28339#[inline]
28340#[target_feature(enable = "avx512f")]
28341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28342#[cfg_attr(test, assert_instr(vpord))]
28343pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28344    unsafe {
28345        let or = _mm512_or_epi32(a, b).as_i32x16();
28346        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
28347    }
28348}
28349
28350/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28351///
28352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28353#[inline]
28354#[target_feature(enable = "avx512f")]
28355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28356#[cfg_attr(test, assert_instr(vpord))]
28357pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28358    unsafe {
28359        let or = _mm512_or_epi32(a, b).as_i32x16();
28360        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
28361    }
28362}
28363
28364/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28365///
28366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28367#[inline]
28368#[target_feature(enable = "avx512f,avx512vl")]
28369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28370#[cfg_attr(test, assert_instr(vor))] //should be vpord
28371pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28372    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
28373}
28374
28375/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28376///
28377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28378#[inline]
28379#[target_feature(enable = "avx512f,avx512vl")]
28380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28381#[cfg_attr(test, assert_instr(vpord))]
28382pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28383    unsafe {
28384        let or = _mm256_or_epi32(a, b).as_i32x8();
28385        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
28386    }
28387}
28388
28389/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28390///
28391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28392#[inline]
28393#[target_feature(enable = "avx512f,avx512vl")]
28394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28395#[cfg_attr(test, assert_instr(vpord))]
28396pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28397    unsafe {
28398        let or = _mm256_or_epi32(a, b).as_i32x8();
28399        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
28400    }
28401}
28402
28403/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28404///
28405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28406#[inline]
28407#[target_feature(enable = "avx512f,avx512vl")]
28408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28409#[cfg_attr(test, assert_instr(vor))] //should be vpord
28410pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28411    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
28412}
28413
28414/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28415///
28416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28417#[inline]
28418#[target_feature(enable = "avx512f,avx512vl")]
28419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28420#[cfg_attr(test, assert_instr(vpord))]
28421pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28422    unsafe {
28423        let or = _mm_or_epi32(a, b).as_i32x4();
28424        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
28425    }
28426}
28427
28428/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28429///
28430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28431#[inline]
28432#[target_feature(enable = "avx512f,avx512vl")]
28433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28434#[cfg_attr(test, assert_instr(vpord))]
28435pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28436    unsafe {
28437        let or = _mm_or_epi32(a, b).as_i32x4();
28438        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
28439    }
28440}
28441
28442/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28443///
28444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28445#[inline]
28446#[target_feature(enable = "avx512f")]
28447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28448#[cfg_attr(test, assert_instr(vporq))]
28449pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28450    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
28451}
28452
28453/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28454///
28455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28456#[inline]
28457#[target_feature(enable = "avx512f")]
28458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28459#[cfg_attr(test, assert_instr(vporq))]
28460pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28461    unsafe {
28462        let or = _mm512_or_epi64(a, b).as_i64x8();
28463        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
28464    }
28465}
28466
28467/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28468///
28469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28470#[inline]
28471#[target_feature(enable = "avx512f")]
28472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28473#[cfg_attr(test, assert_instr(vporq))]
28474pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28475    unsafe {
28476        let or = _mm512_or_epi64(a, b).as_i64x8();
28477        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
28478    }
28479}
28480
28481/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28482///
28483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28484#[inline]
28485#[target_feature(enable = "avx512f,avx512vl")]
28486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28487#[cfg_attr(test, assert_instr(vor))] //should be vporq
28488pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28489    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
28490}
28491
28492/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28493///
28494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28495#[inline]
28496#[target_feature(enable = "avx512f,avx512vl")]
28497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28498#[cfg_attr(test, assert_instr(vporq))]
28499pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28500    unsafe {
28501        let or = _mm256_or_epi64(a, b).as_i64x4();
28502        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
28503    }
28504}
28505
28506/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28507///
28508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28509#[inline]
28510#[target_feature(enable = "avx512f,avx512vl")]
28511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28512#[cfg_attr(test, assert_instr(vporq))]
28513pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28514    unsafe {
28515        let or = _mm256_or_epi64(a, b).as_i64x4();
28516        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
28517    }
28518}
28519
28520/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28521///
28522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28523#[inline]
28524#[target_feature(enable = "avx512f,avx512vl")]
28525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28526#[cfg_attr(test, assert_instr(vor))] //should be vporq
28527pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28528    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
28529}
28530
28531/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28532///
28533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28534#[inline]
28535#[target_feature(enable = "avx512f,avx512vl")]
28536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28537#[cfg_attr(test, assert_instr(vporq))]
28538pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28539    unsafe {
28540        let or = _mm_or_epi64(a, b).as_i64x2();
28541        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
28542    }
28543}
28544
28545/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28546///
28547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28548#[inline]
28549#[target_feature(enable = "avx512f,avx512vl")]
28550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28551#[cfg_attr(test, assert_instr(vporq))]
28552pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28553    unsafe {
28554        let or = _mm_or_epi64(a, b).as_i64x2();
28555        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
28556    }
28557}
28558
28559/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28560///
28561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28562#[inline]
28563#[target_feature(enable = "avx512f")]
28564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28565#[cfg_attr(test, assert_instr(vporq))]
28566pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28567    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28568}
28569
28570/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28571///
28572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28573#[inline]
28574#[target_feature(enable = "avx512f")]
28575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28576#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28577pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28578    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28579}
28580
28581/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28582///
28583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28584#[inline]
28585#[target_feature(enable = "avx512f")]
28586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28587#[cfg_attr(test, assert_instr(vpxord))]
28588pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28589    unsafe {
28590        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28591        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
28592    }
28593}
28594
28595/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28596///
28597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28598#[inline]
28599#[target_feature(enable = "avx512f")]
28600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28601#[cfg_attr(test, assert_instr(vpxord))]
28602pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28603    unsafe {
28604        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28605        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
28606    }
28607}
28608
28609/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28610///
28611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28612#[inline]
28613#[target_feature(enable = "avx512f,avx512vl")]
28614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28615#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28616pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28617    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
28618}
28619
28620/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28621///
28622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28623#[inline]
28624#[target_feature(enable = "avx512f,avx512vl")]
28625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28626#[cfg_attr(test, assert_instr(vpxord))]
28627pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28628    unsafe {
28629        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28630        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
28631    }
28632}
28633
28634/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28635///
28636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28637#[inline]
28638#[target_feature(enable = "avx512f,avx512vl")]
28639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28640#[cfg_attr(test, assert_instr(vpxord))]
28641pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28642    unsafe {
28643        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28644        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
28645    }
28646}
28647
28648/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28649///
28650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28651#[inline]
28652#[target_feature(enable = "avx512f,avx512vl")]
28653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28654#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28655pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28656    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
28657}
28658
28659/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28660///
28661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28662#[inline]
28663#[target_feature(enable = "avx512f,avx512vl")]
28664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28665#[cfg_attr(test, assert_instr(vpxord))]
28666pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28667    unsafe {
28668        let xor = _mm_xor_epi32(a, b).as_i32x4();
28669        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
28670    }
28671}
28672
28673/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28674///
28675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28676#[inline]
28677#[target_feature(enable = "avx512f,avx512vl")]
28678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28679#[cfg_attr(test, assert_instr(vpxord))]
28680pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28681    unsafe {
28682        let xor = _mm_xor_epi32(a, b).as_i32x4();
28683        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
28684    }
28685}
28686
28687/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28688///
28689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28690#[inline]
28691#[target_feature(enable = "avx512f")]
28692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28693#[cfg_attr(test, assert_instr(vpxorq))]
28694pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28695    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
28696}
28697
28698/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28699///
28700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28701#[inline]
28702#[target_feature(enable = "avx512f")]
28703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28704#[cfg_attr(test, assert_instr(vpxorq))]
28705pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28706    unsafe {
28707        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28708        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
28709    }
28710}
28711
28712/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28713///
28714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28715#[inline]
28716#[target_feature(enable = "avx512f")]
28717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28718#[cfg_attr(test, assert_instr(vpxorq))]
28719pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28720    unsafe {
28721        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28722        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
28723    }
28724}
28725
28726/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28727///
28728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28729#[inline]
28730#[target_feature(enable = "avx512f,avx512vl")]
28731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28732#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28733pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28734    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
28735}
28736
28737/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28738///
28739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28740#[inline]
28741#[target_feature(enable = "avx512f,avx512vl")]
28742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28743#[cfg_attr(test, assert_instr(vpxorq))]
28744pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28745    unsafe {
28746        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28747        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
28748    }
28749}
28750
28751/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28752///
28753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28754#[inline]
28755#[target_feature(enable = "avx512f,avx512vl")]
28756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28757#[cfg_attr(test, assert_instr(vpxorq))]
28758pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28759    unsafe {
28760        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28761        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
28762    }
28763}
28764
28765/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28766///
28767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28768#[inline]
28769#[target_feature(enable = "avx512f,avx512vl")]
28770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28771#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28772pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28773    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
28774}
28775
28776/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28777///
28778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28779#[inline]
28780#[target_feature(enable = "avx512f,avx512vl")]
28781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28782#[cfg_attr(test, assert_instr(vpxorq))]
28783pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28784    unsafe {
28785        let xor = _mm_xor_epi64(a, b).as_i64x2();
28786        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
28787    }
28788}
28789
28790/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28791///
28792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28793#[inline]
28794#[target_feature(enable = "avx512f,avx512vl")]
28795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28796#[cfg_attr(test, assert_instr(vpxorq))]
28797pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28798    unsafe {
28799        let xor = _mm_xor_epi64(a, b).as_i64x2();
28800        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
28801    }
28802}
28803
28804/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28805///
28806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28807#[inline]
28808#[target_feature(enable = "avx512f")]
28809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28810#[cfg_attr(test, assert_instr(vpxorq))]
28811pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28812    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28813}
28814
28815/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28816///
28817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28818#[inline]
28819#[target_feature(enable = "avx512f")]
28820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28821#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28822pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28823    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
28824}
28825
28826/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28827///
28828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28829#[inline]
28830#[target_feature(enable = "avx512f")]
28831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28832#[cfg_attr(test, assert_instr(vpandnd))]
28833pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28834    unsafe {
28835        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28836        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
28837    }
28838}
28839
28840/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28841///
28842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28843#[inline]
28844#[target_feature(enable = "avx512f")]
28845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28846#[cfg_attr(test, assert_instr(vpandnd))]
28847pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28848    unsafe {
28849        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28850        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
28851    }
28852}
28853
28854/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28855///
28856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28857#[inline]
28858#[target_feature(enable = "avx512f,avx512vl")]
28859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28860#[cfg_attr(test, assert_instr(vpandnd))]
28861pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28862    unsafe {
28863        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28864        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28865        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
28866    }
28867}
28868
28869/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28870///
28871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28872#[inline]
28873#[target_feature(enable = "avx512f,avx512vl")]
28874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28875#[cfg_attr(test, assert_instr(vpandnd))]
28876pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28877    unsafe {
28878        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28879        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28880        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
28881    }
28882}
28883
28884/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28885///
28886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28887#[inline]
28888#[target_feature(enable = "avx512f,avx512vl")]
28889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28890#[cfg_attr(test, assert_instr(vpandnd))]
28891pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28892    unsafe {
28893        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28894        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28895        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
28896    }
28897}
28898
28899/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28900///
28901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28902#[inline]
28903#[target_feature(enable = "avx512f,avx512vl")]
28904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28905#[cfg_attr(test, assert_instr(vpandnd))]
28906pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28907    unsafe {
28908        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28909        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28910        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
28911    }
28912}
28913
28914/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28915///
28916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28917#[inline]
28918#[target_feature(enable = "avx512f")]
28919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28920#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28921pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28922    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
28923}
28924
28925/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28926///
28927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28928#[inline]
28929#[target_feature(enable = "avx512f")]
28930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28931#[cfg_attr(test, assert_instr(vpandnq))]
28932pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28933    unsafe {
28934        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28935        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
28936    }
28937}
28938
28939/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28940///
28941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28942#[inline]
28943#[target_feature(enable = "avx512f")]
28944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28945#[cfg_attr(test, assert_instr(vpandnq))]
28946pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28947    unsafe {
28948        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28949        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
28950    }
28951}
28952
28953/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28954///
28955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28956#[inline]
28957#[target_feature(enable = "avx512f,avx512vl")]
28958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28959#[cfg_attr(test, assert_instr(vpandnq))]
28960pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28961    unsafe {
28962        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28963        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28964        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
28965    }
28966}
28967
28968/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28969///
28970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28971#[inline]
28972#[target_feature(enable = "avx512f,avx512vl")]
28973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28974#[cfg_attr(test, assert_instr(vpandnq))]
28975pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28976    unsafe {
28977        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28978        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28979        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
28980    }
28981}
28982
28983/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28984///
28985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
28986#[inline]
28987#[target_feature(enable = "avx512f,avx512vl")]
28988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28989#[cfg_attr(test, assert_instr(vpandnq))]
28990pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28991    unsafe {
28992        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
28993        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
28994        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
28995    }
28996}
28997
28998/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28999///
29000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
29001#[inline]
29002#[target_feature(enable = "avx512f,avx512vl")]
29003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29004#[cfg_attr(test, assert_instr(vpandnq))]
29005pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29006    unsafe {
29007        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
29008        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
29009        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
29010    }
29011}
29012
29013/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29014///
29015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29016#[inline]
29017#[target_feature(enable = "avx512f")]
29018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29019#[cfg_attr(test, assert_instr(vpandnq))]
29020pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29021    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
29022}
29023
29024/// Convert 16-bit mask a into an integer value, and store the result in dst.
29025///
29026/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29027#[inline]
29028#[target_feature(enable = "avx512f")]
29029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29030pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29031    a as u32
29032}
29033
29034/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29035///
29036/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29037#[inline]
29038#[target_feature(enable = "avx512f")]
29039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29040pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29041    a as __mmask16
29042}
29043
29044/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29045///
29046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29047#[inline]
29048#[target_feature(enable = "avx512f")]
29049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29050#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29051pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29052    a & b
29053}
29054
29055/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29056///
29057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29058#[inline]
29059#[target_feature(enable = "avx512f")]
29060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29061#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29062pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29063    a & b
29064}
29065
29066/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29067///
29068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29069#[inline]
29070#[target_feature(enable = "avx512f")]
29071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29072#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29073pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29074    a | b
29075}
29076
29077/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29078///
29079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29080#[inline]
29081#[target_feature(enable = "avx512f")]
29082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29083#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29084pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29085    a | b
29086}
29087
29088/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29089///
29090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29091#[inline]
29092#[target_feature(enable = "avx512f")]
29093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29094#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29095pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29096    a ^ b
29097}
29098
29099/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29100///
29101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29102#[inline]
29103#[target_feature(enable = "avx512f")]
29104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29105#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29106pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29107    a ^ b
29108}
29109
29110/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29111///
29112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29113#[inline]
29114#[target_feature(enable = "avx512f")]
29115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29116pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29117    a ^ 0b11111111_11111111
29118}
29119
29120/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29121///
29122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29123#[inline]
29124#[target_feature(enable = "avx512f")]
29125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29126pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29127    a ^ 0b11111111_11111111
29128}
29129
29130/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29131///
29132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29133#[inline]
29134#[target_feature(enable = "avx512f")]
29135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29136#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29137pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29138    _mm512_kand(_mm512_knot(a), b)
29139}
29140
29141/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29142///
29143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29144#[inline]
29145#[target_feature(enable = "avx512f")]
29146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29147#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29148pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29149    _mm512_kand(_mm512_knot(a), b)
29150}
29151
29152/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29153///
29154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29155#[inline]
29156#[target_feature(enable = "avx512f")]
29157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29158#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29159pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29160    _mm512_knot(_mm512_kxor(a, b))
29161}
29162
29163/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29164///
29165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29166#[inline]
29167#[target_feature(enable = "avx512f")]
29168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29169#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29170pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29171    _mm512_knot(_mm512_kxor(a, b))
29172}
29173
29174/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29175/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29176///
29177/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29178#[inline]
29179#[target_feature(enable = "avx512f")]
29180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29181pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29182    let tmp = _kor_mask16(a, b);
29183    *all_ones = (tmp == 0xffff) as u8;
29184    (tmp == 0) as u8
29185}
29186
29187/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29188/// store 0 in dst.
29189///
29190/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29191#[inline]
29192#[target_feature(enable = "avx512f")]
29193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29194pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29195    (_kor_mask16(a, b) == 0xffff) as u8
29196}
29197
29198/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29199/// store 0 in dst.
29200///
29201/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29202#[inline]
29203#[target_feature(enable = "avx512f")]
29204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29205pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29206    (_kor_mask16(a, b) == 0) as u8
29207}
29208
29209/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29210///
29211/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29212#[inline]
29213#[target_feature(enable = "avx512f")]
29214#[rustc_legacy_const_generics(1)]
29215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29216pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29217    a << COUNT
29218}
29219
29220/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29221///
29222/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29223#[inline]
29224#[target_feature(enable = "avx512f")]
29225#[rustc_legacy_const_generics(1)]
29226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29227pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29228    a >> COUNT
29229}
29230
29231/// Load 16-bit mask from memory
29232///
29233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29234#[inline]
29235#[target_feature(enable = "avx512f")]
29236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29237pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29238    *mem_addr
29239}
29240
29241/// Store 16-bit mask to memory
29242///
29243/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29244#[inline]
29245#[target_feature(enable = "avx512f")]
29246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29247pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29248    *mem_addr = a;
29249}
29250
29251/// Copy 16-bit mask a to k.
29252///
29253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29254#[inline]
29255#[target_feature(enable = "avx512f")]
29256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29257#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29258pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29259    a
29260}
29261
29262/// Converts integer mask into bitmask, storing the result in dst.
29263///
29264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29265#[inline]
29266#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29268pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29269    mask as u16
29270}
29271
29272/// Converts bit mask k1 into an integer value, storing the results in dst.
29273///
29274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29275#[inline]
29276#[target_feature(enable = "avx512f")]
29277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29278#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29279pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29280    k1 as i32
29281}
29282
29283/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29284///
29285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29286#[inline]
29287#[target_feature(enable = "avx512f")]
29288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29289#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29290pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29291    ((a & 0xff) << 8) | (b & 0xff)
29292}
29293
29294/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29295///
29296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29297#[inline]
29298#[target_feature(enable = "avx512f")]
29299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29300#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29301pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29302    let r = (a | b) == 0b11111111_11111111;
29303    r as i32
29304}
29305
29306/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29307///
29308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29309#[inline]
29310#[target_feature(enable = "avx512f")]
29311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29312#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29313pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29314    let r = (a | b) == 0;
29315    r as i32
29316}
29317
29318/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29319///
29320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29321#[inline]
29322#[target_feature(enable = "avx512f")]
29323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29324#[cfg_attr(test, assert_instr(vptestmd))]
29325pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29326    let and = _mm512_and_epi32(a, b);
29327    let zero = _mm512_setzero_si512();
29328    _mm512_cmpneq_epi32_mask(and, zero)
29329}
29330
29331/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29332///
29333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29334#[inline]
29335#[target_feature(enable = "avx512f")]
29336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29337#[cfg_attr(test, assert_instr(vptestmd))]
29338pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29339    let and = _mm512_and_epi32(a, b);
29340    let zero = _mm512_setzero_si512();
29341    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
29342}
29343
29344/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29345///
29346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29347#[inline]
29348#[target_feature(enable = "avx512f,avx512vl")]
29349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29350#[cfg_attr(test, assert_instr(vptestmd))]
29351pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29352    let and = _mm256_and_si256(a, b);
29353    let zero = _mm256_setzero_si256();
29354    _mm256_cmpneq_epi32_mask(and, zero)
29355}
29356
29357/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29358///
29359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29360#[inline]
29361#[target_feature(enable = "avx512f,avx512vl")]
29362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29363#[cfg_attr(test, assert_instr(vptestmd))]
29364pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29365    let and = _mm256_and_si256(a, b);
29366    let zero = _mm256_setzero_si256();
29367    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
29368}
29369
29370/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29371///
29372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29373#[inline]
29374#[target_feature(enable = "avx512f,avx512vl")]
29375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29376#[cfg_attr(test, assert_instr(vptestmd))]
29377pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29378    let and = _mm_and_si128(a, b);
29379    let zero = _mm_setzero_si128();
29380    _mm_cmpneq_epi32_mask(and, zero)
29381}
29382
29383/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29384///
29385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29386#[inline]
29387#[target_feature(enable = "avx512f,avx512vl")]
29388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29389#[cfg_attr(test, assert_instr(vptestmd))]
29390pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29391    let and = _mm_and_si128(a, b);
29392    let zero = _mm_setzero_si128();
29393    _mm_mask_cmpneq_epi32_mask(k, and, zero)
29394}
29395
29396/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29399#[inline]
29400#[target_feature(enable = "avx512f")]
29401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29402#[cfg_attr(test, assert_instr(vptestmq))]
29403pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29404    let and = _mm512_and_epi64(a, b);
29405    let zero = _mm512_setzero_si512();
29406    _mm512_cmpneq_epi64_mask(and, zero)
29407}
29408
29409/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29410///
29411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29412#[inline]
29413#[target_feature(enable = "avx512f")]
29414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29415#[cfg_attr(test, assert_instr(vptestmq))]
29416pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29417    let and = _mm512_and_epi64(a, b);
29418    let zero = _mm512_setzero_si512();
29419    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
29420}
29421
29422/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29423///
29424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29425#[inline]
29426#[target_feature(enable = "avx512f,avx512vl")]
29427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29428#[cfg_attr(test, assert_instr(vptestmq))]
29429pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29430    let and = _mm256_and_si256(a, b);
29431    let zero = _mm256_setzero_si256();
29432    _mm256_cmpneq_epi64_mask(and, zero)
29433}
29434
29435/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29436///
29437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29438#[inline]
29439#[target_feature(enable = "avx512f,avx512vl")]
29440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29441#[cfg_attr(test, assert_instr(vptestmq))]
29442pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29443    let and = _mm256_and_si256(a, b);
29444    let zero = _mm256_setzero_si256();
29445    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
29446}
29447
29448/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29449///
29450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29451#[inline]
29452#[target_feature(enable = "avx512f,avx512vl")]
29453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29454#[cfg_attr(test, assert_instr(vptestmq))]
29455pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29456    let and = _mm_and_si128(a, b);
29457    let zero = _mm_setzero_si128();
29458    _mm_cmpneq_epi64_mask(and, zero)
29459}
29460
29461/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29462///
29463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29464#[inline]
29465#[target_feature(enable = "avx512f,avx512vl")]
29466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29467#[cfg_attr(test, assert_instr(vptestmq))]
29468pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29469    let and = _mm_and_si128(a, b);
29470    let zero = _mm_setzero_si128();
29471    _mm_mask_cmpneq_epi64_mask(k, and, zero)
29472}
29473
29474/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29475///
29476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29477#[inline]
29478#[target_feature(enable = "avx512f")]
29479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29480#[cfg_attr(test, assert_instr(vptestnmd))]
29481pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29482    let and = _mm512_and_epi32(a, b);
29483    let zero = _mm512_setzero_si512();
29484    _mm512_cmpeq_epi32_mask(and, zero)
29485}
29486
29487/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29488///
29489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29490#[inline]
29491#[target_feature(enable = "avx512f")]
29492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29493#[cfg_attr(test, assert_instr(vptestnmd))]
29494pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29495    let and = _mm512_and_epi32(a, b);
29496    let zero = _mm512_setzero_si512();
29497    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
29498}
29499
29500/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29501///
29502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29503#[inline]
29504#[target_feature(enable = "avx512f,avx512vl")]
29505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29506#[cfg_attr(test, assert_instr(vptestnmd))]
29507pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29508    let and = _mm256_and_si256(a, b);
29509    let zero = _mm256_setzero_si256();
29510    _mm256_cmpeq_epi32_mask(and, zero)
29511}
29512
29513/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29514///
29515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29516#[inline]
29517#[target_feature(enable = "avx512f,avx512vl")]
29518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29519#[cfg_attr(test, assert_instr(vptestnmd))]
29520pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29521    let and = _mm256_and_si256(a, b);
29522    let zero = _mm256_setzero_si256();
29523    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
29524}
29525
29526/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29527///
29528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29529#[inline]
29530#[target_feature(enable = "avx512f,avx512vl")]
29531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29532#[cfg_attr(test, assert_instr(vptestnmd))]
29533pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29534    let and = _mm_and_si128(a, b);
29535    let zero = _mm_setzero_si128();
29536    _mm_cmpeq_epi32_mask(and, zero)
29537}
29538
29539/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29540///
29541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29542#[inline]
29543#[target_feature(enable = "avx512f,avx512vl")]
29544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29545#[cfg_attr(test, assert_instr(vptestnmd))]
29546pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29547    let and = _mm_and_si128(a, b);
29548    let zero = _mm_setzero_si128();
29549    _mm_mask_cmpeq_epi32_mask(k, and, zero)
29550}
29551
29552/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29553///
29554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29555#[inline]
29556#[target_feature(enable = "avx512f")]
29557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29558#[cfg_attr(test, assert_instr(vptestnmq))]
29559pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29560    let and = _mm512_and_epi64(a, b);
29561    let zero = _mm512_setzero_si512();
29562    _mm512_cmpeq_epi64_mask(and, zero)
29563}
29564
29565/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29566///
29567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29568#[inline]
29569#[target_feature(enable = "avx512f")]
29570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29571#[cfg_attr(test, assert_instr(vptestnmq))]
29572pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29573    let and = _mm512_and_epi64(a, b);
29574    let zero = _mm512_setzero_si512();
29575    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
29576}
29577
29578/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29579///
29580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29581#[inline]
29582#[target_feature(enable = "avx512f,avx512vl")]
29583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29584#[cfg_attr(test, assert_instr(vptestnmq))]
29585pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29586    let and = _mm256_and_si256(a, b);
29587    let zero = _mm256_setzero_si256();
29588    _mm256_cmpeq_epi64_mask(and, zero)
29589}
29590
29591/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29592///
29593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29594#[inline]
29595#[target_feature(enable = "avx512f,avx512vl")]
29596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29597#[cfg_attr(test, assert_instr(vptestnmq))]
29598pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29599    let and = _mm256_and_si256(a, b);
29600    let zero = _mm256_setzero_si256();
29601    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
29602}
29603
29604/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29605///
29606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29607#[inline]
29608#[target_feature(enable = "avx512f,avx512vl")]
29609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29610#[cfg_attr(test, assert_instr(vptestnmq))]
29611pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29612    let and = _mm_and_si128(a, b);
29613    let zero = _mm_setzero_si128();
29614    _mm_cmpeq_epi64_mask(and, zero)
29615}
29616
29617/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29618///
29619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29620#[inline]
29621#[target_feature(enable = "avx512f,avx512vl")]
29622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29623#[cfg_attr(test, assert_instr(vptestnmq))]
29624pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29625    let and = _mm_and_si128(a, b);
29626    let zero = _mm_setzero_si128();
29627    _mm_mask_cmpeq_epi64_mask(k, and, zero)
29628}
29629
29630/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29631///
29632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29633///
29634/// # Safety of non-temporal stores
29635///
29636/// After using this intrinsic, but before any other access to the memory that this intrinsic
29637/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29638/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29639/// return.
29640///
29641/// See [`_mm_sfence`] for details.
29642#[inline]
29643#[target_feature(enable = "avx512f")]
29644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29645#[cfg_attr(test, assert_instr(vmovntps))]
29646#[allow(clippy::cast_ptr_alignment)]
29647pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29648    crate::arch::asm!(
29649        vps!("vmovntps", ",{a}"),
29650        p = in(reg) mem_addr,
29651        a = in(zmm_reg) a,
29652        options(nostack, preserves_flags),
29653    );
29654}
29655
29656/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29657///
29658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29659///
29660/// # Safety of non-temporal stores
29661///
29662/// After using this intrinsic, but before any other access to the memory that this intrinsic
29663/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29664/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29665/// return.
29666///
29667/// See [`_mm_sfence`] for details.
29668#[inline]
29669#[target_feature(enable = "avx512f")]
29670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29671#[cfg_attr(test, assert_instr(vmovntpd))]
29672#[allow(clippy::cast_ptr_alignment)]
29673pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29674    crate::arch::asm!(
29675        vps!("vmovntpd", ",{a}"),
29676        p = in(reg) mem_addr,
29677        a = in(zmm_reg) a,
29678        options(nostack, preserves_flags),
29679    );
29680}
29681
29682/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29683///
29684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29685///
29686/// # Safety of non-temporal stores
29687///
29688/// After using this intrinsic, but before any other access to the memory that this intrinsic
29689/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29690/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29691/// return.
29692///
29693/// See [`_mm_sfence`] for details.
29694#[inline]
29695#[target_feature(enable = "avx512f")]
29696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29697#[cfg_attr(test, assert_instr(vmovntdq))]
29698#[allow(clippy::cast_ptr_alignment)]
29699pub unsafe fn _mm512_stream_si512(mem_addr: *mut i32, a: __m512i) {
29700    crate::arch::asm!(
29701        vps!("vmovntdq", ",{a}"),
29702        p = in(reg) mem_addr,
29703        a = in(zmm_reg) a,
29704        options(nostack, preserves_flags),
29705    );
29706}
29707
29708/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29709/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29710/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29711///
29712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29713#[inline]
29714#[target_feature(enable = "avx512f")]
29715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29716pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29717    let dst: __m512i;
29718    crate::arch::asm!(
29719        vpl!("vmovntdqa {a}"),
29720        a = out(zmm_reg) dst,
29721        p = in(reg) mem_addr,
29722        options(pure, readonly, nostack, preserves_flags),
29723    );
29724    dst
29725}
29726
29727/// Sets packed 32-bit integers in `dst` with the supplied values.
29728///
29729/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29730#[inline]
29731#[target_feature(enable = "avx512f")]
29732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29733pub fn _mm512_set_ps(
29734    e0: f32,
29735    e1: f32,
29736    e2: f32,
29737    e3: f32,
29738    e4: f32,
29739    e5: f32,
29740    e6: f32,
29741    e7: f32,
29742    e8: f32,
29743    e9: f32,
29744    e10: f32,
29745    e11: f32,
29746    e12: f32,
29747    e13: f32,
29748    e14: f32,
29749    e15: f32,
29750) -> __m512 {
29751    _mm512_setr_ps(
29752        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
29753    )
29754}
29755
29756/// Sets packed 32-bit integers in `dst` with the supplied values in
29757/// reverse order.
29758///
29759/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29760#[inline]
29761#[target_feature(enable = "avx512f")]
29762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29763pub fn _mm512_setr_ps(
29764    e0: f32,
29765    e1: f32,
29766    e2: f32,
29767    e3: f32,
29768    e4: f32,
29769    e5: f32,
29770    e6: f32,
29771    e7: f32,
29772    e8: f32,
29773    e9: f32,
29774    e10: f32,
29775    e11: f32,
29776    e12: f32,
29777    e13: f32,
29778    e14: f32,
29779    e15: f32,
29780) -> __m512 {
29781    unsafe {
29782        let r = f32x16::new(
29783            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29784        );
29785        transmute(r)
29786    }
29787}
29788
29789/// Broadcast 64-bit float `a` to all elements of `dst`.
29790///
29791/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29792#[inline]
29793#[target_feature(enable = "avx512f")]
29794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29795pub fn _mm512_set1_pd(a: f64) -> __m512d {
29796    unsafe { transmute(f64x8::splat(a)) }
29797}
29798
29799/// Broadcast 32-bit float `a` to all elements of `dst`.
29800///
29801/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29802#[inline]
29803#[target_feature(enable = "avx512f")]
29804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29805pub fn _mm512_set1_ps(a: f32) -> __m512 {
29806    unsafe { transmute(f32x16::splat(a)) }
29807}
29808
29809/// Sets packed 32-bit integers in `dst` with the supplied values.
29810///
29811/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29812#[inline]
29813#[target_feature(enable = "avx512f")]
29814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29815pub fn _mm512_set_epi32(
29816    e15: i32,
29817    e14: i32,
29818    e13: i32,
29819    e12: i32,
29820    e11: i32,
29821    e10: i32,
29822    e9: i32,
29823    e8: i32,
29824    e7: i32,
29825    e6: i32,
29826    e5: i32,
29827    e4: i32,
29828    e3: i32,
29829    e2: i32,
29830    e1: i32,
29831    e0: i32,
29832) -> __m512i {
29833    _mm512_setr_epi32(
29834        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29835    )
29836}
29837
29838/// Broadcast 8-bit integer a to all elements of dst.
29839///
29840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29841#[inline]
29842#[target_feature(enable = "avx512f")]
29843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29844pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29845    unsafe { transmute(i8x64::splat(a)) }
29846}
29847
29848/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29849///
29850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29851#[inline]
29852#[target_feature(enable = "avx512f")]
29853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29854pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29855    unsafe { transmute(i16x32::splat(a)) }
29856}
29857
29858/// Broadcast 32-bit integer `a` to all elements of `dst`.
29859///
29860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29861#[inline]
29862#[target_feature(enable = "avx512f")]
29863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29864pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29865    unsafe { transmute(i32x16::splat(a)) }
29866}
29867
29868/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29869///
29870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29871#[inline]
29872#[target_feature(enable = "avx512f")]
29873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29874#[cfg_attr(test, assert_instr(vpbroadcastd))]
29875pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29876    unsafe {
29877        let r = _mm512_set1_epi32(a).as_i32x16();
29878        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29879    }
29880}
29881
29882/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29883///
29884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29885#[inline]
29886#[target_feature(enable = "avx512f")]
29887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29888#[cfg_attr(test, assert_instr(vpbroadcastd))]
29889pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29890    unsafe {
29891        let r = _mm512_set1_epi32(a).as_i32x16();
29892        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
29893    }
29894}
29895
29896/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29897///
29898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29899#[inline]
29900#[target_feature(enable = "avx512f,avx512vl")]
29901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29902#[cfg_attr(test, assert_instr(vpbroadcastd))]
29903pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29904    unsafe {
29905        let r = _mm256_set1_epi32(a).as_i32x8();
29906        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
29907    }
29908}
29909
29910/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29911///
29912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29913#[inline]
29914#[target_feature(enable = "avx512f,avx512vl")]
29915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29916#[cfg_attr(test, assert_instr(vpbroadcastd))]
29917pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29918    unsafe {
29919        let r = _mm256_set1_epi32(a).as_i32x8();
29920        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
29921    }
29922}
29923
29924/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29925///
29926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29927#[inline]
29928#[target_feature(enable = "avx512f,avx512vl")]
29929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29930#[cfg_attr(test, assert_instr(vpbroadcastd))]
29931pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29932    unsafe {
29933        let r = _mm_set1_epi32(a).as_i32x4();
29934        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
29935    }
29936}
29937
29938/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29939///
29940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29941#[inline]
29942#[target_feature(enable = "avx512f,avx512vl")]
29943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29944#[cfg_attr(test, assert_instr(vpbroadcastd))]
29945pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29946    unsafe {
29947        let r = _mm_set1_epi32(a).as_i32x4();
29948        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
29949    }
29950}
29951
29952/// Broadcast 64-bit integer `a` to all elements of `dst`.
29953///
29954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29955#[inline]
29956#[target_feature(enable = "avx512f")]
29957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29958pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29959    unsafe { transmute(i64x8::splat(a)) }
29960}
29961
29962/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29963///
29964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29965#[inline]
29966#[target_feature(enable = "avx512f")]
29967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29968#[cfg_attr(test, assert_instr(vpbroadcastq))]
29969pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29970    unsafe {
29971        let r = _mm512_set1_epi64(a).as_i64x8();
29972        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
29973    }
29974}
29975
29976/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29977///
29978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29979#[inline]
29980#[target_feature(enable = "avx512f")]
29981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29982#[cfg_attr(test, assert_instr(vpbroadcastq))]
29983pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
29984    unsafe {
29985        let r = _mm512_set1_epi64(a).as_i64x8();
29986        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
29987    }
29988}
29989
29990/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29991///
29992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
29993#[inline]
29994#[target_feature(enable = "avx512f,avx512vl")]
29995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29996#[cfg_attr(test, assert_instr(vpbroadcastq))]
29997pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
29998    unsafe {
29999        let r = _mm256_set1_epi64x(a).as_i64x4();
30000        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
30001    }
30002}
30003
30004/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30005///
30006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
30007#[inline]
30008#[target_feature(enable = "avx512f,avx512vl")]
30009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30010#[cfg_attr(test, assert_instr(vpbroadcastq))]
30011pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30012    unsafe {
30013        let r = _mm256_set1_epi64x(a).as_i64x4();
30014        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
30015    }
30016}
30017
30018/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30019///
30020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30021#[inline]
30022#[target_feature(enable = "avx512f,avx512vl")]
30023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30024#[cfg_attr(test, assert_instr(vpbroadcastq))]
30025pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30026    unsafe {
30027        let r = _mm_set1_epi64x(a).as_i64x2();
30028        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
30029    }
30030}
30031
30032/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30033///
30034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30035#[inline]
30036#[target_feature(enable = "avx512f,avx512vl")]
30037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30038#[cfg_attr(test, assert_instr(vpbroadcastq))]
30039pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30040    unsafe {
30041        let r = _mm_set1_epi64x(a).as_i64x2();
30042        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
30043    }
30044}
30045
30046/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30047///
30048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30049#[inline]
30050#[target_feature(enable = "avx512f")]
30051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30052pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30053    _mm512_set_epi64(d, c, b, a, d, c, b, a)
30054}
30055
30056/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30057///
30058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30059#[inline]
30060#[target_feature(enable = "avx512f")]
30061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30062pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30063    _mm512_set_epi64(a, b, c, d, a, b, c, d)
30064}
30065
30066/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30067///
30068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30069#[inline]
30070#[target_feature(enable = "avx512f")]
30071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30072#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30073pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30074    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30075}
30076
30077/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30078///
30079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30080#[inline]
30081#[target_feature(enable = "avx512f")]
30082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30083#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30084pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30085    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30086}
30087
30088/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30089///
30090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30091#[inline]
30092#[target_feature(enable = "avx512f")]
30093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30094#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30095pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30096    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30097}
30098
30099/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30100///
30101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30102#[inline]
30103#[target_feature(enable = "avx512f")]
30104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30105#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30106pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30107    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30108}
30109
30110/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30111///
30112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30113#[inline]
30114#[target_feature(enable = "avx512f")]
30115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30116#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30117pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30118    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30119}
30120
30121/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30122///
30123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30124#[inline]
30125#[target_feature(enable = "avx512f")]
30126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30127#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30128pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30129    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30130}
30131
30132/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30133///
30134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30135#[inline]
30136#[target_feature(enable = "avx512f")]
30137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30138#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30139pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30140    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30141}
30142
30143/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30144///
30145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30146#[inline]
30147#[target_feature(enable = "avx512f")]
30148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30149#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30150pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30151    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30152}
30153
30154/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30155///
30156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30157#[inline]
30158#[target_feature(enable = "avx512f")]
30159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30160#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30161pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30162    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30163}
30164
30165/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30166///
30167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30168#[inline]
30169#[target_feature(enable = "avx512f")]
30170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30171#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30172pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30173    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30174}
30175
30176/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30177///
30178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30179#[inline]
30180#[target_feature(enable = "avx512f")]
30181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30182#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30183pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30184    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30185}
30186
30187/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30188///
30189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30190#[inline]
30191#[target_feature(enable = "avx512f")]
30192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30193#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30194pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30195    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30196}
30197
30198/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30199///
30200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30201#[inline]
30202#[target_feature(enable = "avx512f")]
30203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30204#[rustc_legacy_const_generics(2)]
30205#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30206pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30207    unsafe {
30208        static_assert_uimm_bits!(IMM8, 5);
30209        let neg_one = -1;
30210        let a = a.as_f32x16();
30211        let b = b.as_f32x16();
30212        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30213        r.cast_unsigned()
30214    }
30215}
30216
30217/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30218///
30219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30220#[inline]
30221#[target_feature(enable = "avx512f")]
30222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30223#[rustc_legacy_const_generics(3)]
30224#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30225pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30226    unsafe {
30227        static_assert_uimm_bits!(IMM8, 5);
30228        let a = a.as_f32x16();
30229        let b = b.as_f32x16();
30230        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
30231        r.cast_unsigned()
30232    }
30233}
30234
30235/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30236///
30237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30238#[inline]
30239#[target_feature(enable = "avx512f,avx512vl")]
30240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30241#[rustc_legacy_const_generics(2)]
30242#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30243pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30244    unsafe {
30245        static_assert_uimm_bits!(IMM8, 5);
30246        let neg_one = -1;
30247        let a = a.as_f32x8();
30248        let b = b.as_f32x8();
30249        let r = vcmpps256(a, b, IMM8, neg_one);
30250        r.cast_unsigned()
30251    }
30252}
30253
30254/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30255///
30256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30257#[inline]
30258#[target_feature(enable = "avx512f,avx512vl")]
30259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30260#[rustc_legacy_const_generics(3)]
30261#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30262pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30263    unsafe {
30264        static_assert_uimm_bits!(IMM8, 5);
30265        let a = a.as_f32x8();
30266        let b = b.as_f32x8();
30267        let r = vcmpps256(a, b, IMM8, k1 as i8);
30268        r.cast_unsigned()
30269    }
30270}
30271
30272/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30273///
30274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30275#[inline]
30276#[target_feature(enable = "avx512f,avx512vl")]
30277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30278#[rustc_legacy_const_generics(2)]
30279#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30280pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30281    unsafe {
30282        static_assert_uimm_bits!(IMM8, 5);
30283        let neg_one = -1;
30284        let a = a.as_f32x4();
30285        let b = b.as_f32x4();
30286        let r = vcmpps128(a, b, IMM8, neg_one);
30287        r.cast_unsigned()
30288    }
30289}
30290
30291/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30292///
30293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30294#[inline]
30295#[target_feature(enable = "avx512f,avx512vl")]
30296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30297#[rustc_legacy_const_generics(3)]
30298#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30299pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30300    unsafe {
30301        static_assert_uimm_bits!(IMM8, 5);
30302        let a = a.as_f32x4();
30303        let b = b.as_f32x4();
30304        let r = vcmpps128(a, b, IMM8, k1 as i8);
30305        r.cast_unsigned()
30306    }
30307}
30308
30309/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30310/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30311///
30312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30313#[inline]
30314#[target_feature(enable = "avx512f")]
30315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30316#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30317#[rustc_legacy_const_generics(2, 3)]
30318pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30319    a: __m512,
30320    b: __m512,
30321) -> __mmask16 {
30322    unsafe {
30323        static_assert_uimm_bits!(IMM5, 5);
30324        static_assert_mantissas_sae!(SAE);
30325        let neg_one = -1;
30326        let a = a.as_f32x16();
30327        let b = b.as_f32x16();
30328        let r = vcmpps(a, b, IMM5, neg_one, SAE);
30329        r.cast_unsigned()
30330    }
30331}
30332
30333/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30334/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30335///
30336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30337#[inline]
30338#[target_feature(enable = "avx512f")]
30339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30340#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30341#[rustc_legacy_const_generics(3, 4)]
30342pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30343    m: __mmask16,
30344    a: __m512,
30345    b: __m512,
30346) -> __mmask16 {
30347    unsafe {
30348        static_assert_uimm_bits!(IMM5, 5);
30349        static_assert_mantissas_sae!(SAE);
30350        let a = a.as_f32x16();
30351        let b = b.as_f32x16();
30352        let r = vcmpps(a, b, IMM5, m as i16, SAE);
30353        r.cast_unsigned()
30354    }
30355}
30356
30357/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30358///
30359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30360#[inline]
30361#[target_feature(enable = "avx512f")]
30362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30363#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30364pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30365    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30366}
30367
30368/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30369///
30370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30371#[inline]
30372#[target_feature(enable = "avx512f")]
30373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30374#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30375pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30376    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30377}
30378
30379/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30380///
30381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30382#[inline]
30383#[target_feature(enable = "avx512f")]
30384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30385#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30386pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30387    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30388}
30389
30390/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30391///
30392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30393#[inline]
30394#[target_feature(enable = "avx512f")]
30395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30396#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30397pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30398    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30399}
30400
30401/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30402///
30403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30404#[inline]
30405#[target_feature(enable = "avx512f")]
30406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30407#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30408pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30409    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30410}
30411
30412/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30413///
30414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30415#[inline]
30416#[target_feature(enable = "avx512f")]
30417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30418#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30419pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30420    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30421}
30422
30423/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30424///
30425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30426#[inline]
30427#[target_feature(enable = "avx512f")]
30428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30429#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30430pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30431    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30432}
30433
30434/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30435///
30436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30437#[inline]
30438#[target_feature(enable = "avx512f")]
30439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30440#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30441pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30442    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
30443}
30444
30445/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30446///
30447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30448#[inline]
30449#[target_feature(enable = "avx512f")]
30450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30451#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30452pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30453    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30454}
30455
30456/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30457///
30458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30459#[inline]
30460#[target_feature(enable = "avx512f")]
30461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30462#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30463pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30464    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30465}
30466
30467/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30468///
30469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30470#[inline]
30471#[target_feature(enable = "avx512f")]
30472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30473#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30474pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30475    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30476}
30477
30478/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30479///
30480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30481#[inline]
30482#[target_feature(enable = "avx512f")]
30483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30484#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30485pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30486    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30487}
30488
30489/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30490///
30491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30492#[inline]
30493#[target_feature(enable = "avx512f")]
30494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30495#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30496pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30497    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30498}
30499
30500/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30501///
30502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30503#[inline]
30504#[target_feature(enable = "avx512f")]
30505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30506#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30507pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30508    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30509}
30510
30511/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30512///
30513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30514#[inline]
30515#[target_feature(enable = "avx512f")]
30516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30517#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30518pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30519    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30520}
30521
30522/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30523///
30524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30525#[inline]
30526#[target_feature(enable = "avx512f")]
30527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30528#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30529pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30530    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30531}
30532
30533/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30534///
30535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30536#[inline]
30537#[target_feature(enable = "avx512f")]
30538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30539#[rustc_legacy_const_generics(2)]
30540#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30541pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30542    unsafe {
30543        static_assert_uimm_bits!(IMM8, 5);
30544        let neg_one = -1;
30545        let a = a.as_f64x8();
30546        let b = b.as_f64x8();
30547        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30548        r.cast_unsigned()
30549    }
30550}
30551
30552/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30553///
30554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30555#[inline]
30556#[target_feature(enable = "avx512f")]
30557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30558#[rustc_legacy_const_generics(3)]
30559#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30560pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30561    unsafe {
30562        static_assert_uimm_bits!(IMM8, 5);
30563        let a = a.as_f64x8();
30564        let b = b.as_f64x8();
30565        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30566        r.cast_unsigned()
30567    }
30568}
30569
30570/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30571///
30572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30573#[inline]
30574#[target_feature(enable = "avx512f,avx512vl")]
30575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30576#[rustc_legacy_const_generics(2)]
30577#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30578pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30579    unsafe {
30580        static_assert_uimm_bits!(IMM8, 5);
30581        let neg_one = -1;
30582        let a = a.as_f64x4();
30583        let b = b.as_f64x4();
30584        let r = vcmppd256(a, b, IMM8, neg_one);
30585        r.cast_unsigned()
30586    }
30587}
30588
30589/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30590///
30591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30592#[inline]
30593#[target_feature(enable = "avx512f,avx512vl")]
30594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30595#[rustc_legacy_const_generics(3)]
30596#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30597pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30598    unsafe {
30599        static_assert_uimm_bits!(IMM8, 5);
30600        let a = a.as_f64x4();
30601        let b = b.as_f64x4();
30602        let r = vcmppd256(a, b, IMM8, k1 as i8);
30603        r.cast_unsigned()
30604    }
30605}
30606
30607/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30608///
30609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30610#[inline]
30611#[target_feature(enable = "avx512f,avx512vl")]
30612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30613#[rustc_legacy_const_generics(2)]
30614#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30615pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30616    unsafe {
30617        static_assert_uimm_bits!(IMM8, 5);
30618        let neg_one = -1;
30619        let a = a.as_f64x2();
30620        let b = b.as_f64x2();
30621        let r = vcmppd128(a, b, IMM8, neg_one);
30622        r.cast_unsigned()
30623    }
30624}
30625
30626/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30627///
30628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30629#[inline]
30630#[target_feature(enable = "avx512f,avx512vl")]
30631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30632#[rustc_legacy_const_generics(3)]
30633#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30634pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30635    unsafe {
30636        static_assert_uimm_bits!(IMM8, 5);
30637        let a = a.as_f64x2();
30638        let b = b.as_f64x2();
30639        let r = vcmppd128(a, b, IMM8, k1 as i8);
30640        r.cast_unsigned()
30641    }
30642}
30643
30644/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30645/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30646///
30647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30648#[inline]
30649#[target_feature(enable = "avx512f")]
30650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30651#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30652#[rustc_legacy_const_generics(2, 3)]
30653pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30654    a: __m512d,
30655    b: __m512d,
30656) -> __mmask8 {
30657    unsafe {
30658        static_assert_uimm_bits!(IMM5, 5);
30659        static_assert_mantissas_sae!(SAE);
30660        let neg_one = -1;
30661        let a = a.as_f64x8();
30662        let b = b.as_f64x8();
30663        let r = vcmppd(a, b, IMM5, neg_one, SAE);
30664        r.cast_unsigned()
30665    }
30666}
30667
30668/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30669/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30670///
30671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30672#[inline]
30673#[target_feature(enable = "avx512f")]
30674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30675#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30676#[rustc_legacy_const_generics(3, 4)]
30677pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30678    k1: __mmask8,
30679    a: __m512d,
30680    b: __m512d,
30681) -> __mmask8 {
30682    unsafe {
30683        static_assert_uimm_bits!(IMM5, 5);
30684        static_assert_mantissas_sae!(SAE);
30685        let a = a.as_f64x8();
30686        let b = b.as_f64x8();
30687        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
30688        r.cast_unsigned()
30689    }
30690}
30691
30692/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30693///
30694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30695#[inline]
30696#[target_feature(enable = "avx512f")]
30697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30698#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30699pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30700    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30701}
30702
30703/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30704///
30705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30706#[inline]
30707#[target_feature(enable = "avx512f")]
30708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30709#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30710pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30711    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30712}
30713
30714/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30715///
30716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30717#[inline]
30718#[target_feature(enable = "avx512f")]
30719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30720#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30721pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30722    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30723}
30724
30725/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30726///
30727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30728#[inline]
30729#[target_feature(enable = "avx512f")]
30730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30731#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30732pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30733    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30734}
30735
30736/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30737///
30738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30739#[inline]
30740#[target_feature(enable = "avx512f")]
30741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30742#[rustc_legacy_const_generics(2)]
30743#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30744pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30745    unsafe {
30746        static_assert_uimm_bits!(IMM8, 5);
30747        let neg_one = -1;
30748        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30749        r.cast_unsigned()
30750    }
30751}
30752
30753/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30754///
30755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30756#[inline]
30757#[target_feature(enable = "avx512f")]
30758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30759#[rustc_legacy_const_generics(3)]
30760#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30761pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30762    unsafe {
30763        static_assert_uimm_bits!(IMM8, 5);
30764        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30765        r.cast_unsigned()
30766    }
30767}
30768
30769/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30771///
30772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30773#[inline]
30774#[target_feature(enable = "avx512f")]
30775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30776#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30777#[rustc_legacy_const_generics(2, 3)]
30778pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30779    unsafe {
30780        static_assert_uimm_bits!(IMM5, 5);
30781        static_assert_mantissas_sae!(SAE);
30782        let neg_one = -1;
30783        let r = vcmpss(a, b, IMM5, neg_one, SAE);
30784        r.cast_unsigned()
30785    }
30786}
30787
30788/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30789/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30790///
30791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30792#[inline]
30793#[target_feature(enable = "avx512f")]
30794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30795#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30796#[rustc_legacy_const_generics(3, 4)]
30797pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30798    k1: __mmask8,
30799    a: __m128,
30800    b: __m128,
30801) -> __mmask8 {
30802    unsafe {
30803        static_assert_uimm_bits!(IMM5, 5);
30804        static_assert_mantissas_sae!(SAE);
30805        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
30806        r.cast_unsigned()
30807    }
30808}
30809
30810/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30811///
30812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30813#[inline]
30814#[target_feature(enable = "avx512f")]
30815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30816#[rustc_legacy_const_generics(2)]
30817#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30818pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30819    unsafe {
30820        static_assert_uimm_bits!(IMM8, 5);
30821        let neg_one = -1;
30822        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30823        r.cast_unsigned()
30824    }
30825}
30826
30827/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30828///
30829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30830#[inline]
30831#[target_feature(enable = "avx512f")]
30832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30833#[rustc_legacy_const_generics(3)]
30834#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30835pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30836    unsafe {
30837        static_assert_uimm_bits!(IMM8, 5);
30838        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30839        r.cast_unsigned()
30840    }
30841}
30842
30843/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30844/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30847#[inline]
30848#[target_feature(enable = "avx512f")]
30849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30850#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30851#[rustc_legacy_const_generics(2, 3)]
30852pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30853    unsafe {
30854        static_assert_uimm_bits!(IMM5, 5);
30855        static_assert_mantissas_sae!(SAE);
30856        let neg_one = -1;
30857        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
30858        r.cast_unsigned()
30859    }
30860}
30861
30862/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30863/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30864///
30865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30866#[inline]
30867#[target_feature(enable = "avx512f")]
30868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30869#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30870#[rustc_legacy_const_generics(3, 4)]
30871pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30872    k1: __mmask8,
30873    a: __m128d,
30874    b: __m128d,
30875) -> __mmask8 {
30876    unsafe {
30877        static_assert_uimm_bits!(IMM5, 5);
30878        static_assert_mantissas_sae!(SAE);
30879        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
30880        r.cast_unsigned()
30881    }
30882}
30883
30884/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30885///
30886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30887#[inline]
30888#[target_feature(enable = "avx512f")]
30889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30890#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30891pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30892    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
30893}
30894
30895/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30896///
30897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30898#[inline]
30899#[target_feature(enable = "avx512f")]
30900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30901#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30902pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30903    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30904}
30905
30906/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30907///
30908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30909#[inline]
30910#[target_feature(enable = "avx512f,avx512vl")]
30911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30912#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30913pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30914    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
30915}
30916
30917/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30918///
30919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30920#[inline]
30921#[target_feature(enable = "avx512f,avx512vl")]
30922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30923#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30924pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30925    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30926}
30927
30928/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30929///
30930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30931#[inline]
30932#[target_feature(enable = "avx512f,avx512vl")]
30933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30934#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30935pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30936    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
30937}
30938
30939/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30940///
30941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30942#[inline]
30943#[target_feature(enable = "avx512f,avx512vl")]
30944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30945#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30946pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30947    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30948}
30949
30950/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30951///
30952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30953#[inline]
30954#[target_feature(enable = "avx512f")]
30955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30956#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30957pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30958    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
30959}
30960
30961/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30962///
30963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30964#[inline]
30965#[target_feature(enable = "avx512f")]
30966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30967#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30968pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30969    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30970}
30971
30972/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30973///
30974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30975#[inline]
30976#[target_feature(enable = "avx512f,avx512vl")]
30977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30978#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30979pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30980    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
30981}
30982
30983/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30984///
30985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
30986#[inline]
30987#[target_feature(enable = "avx512f,avx512vl")]
30988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30989#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30990pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30991    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30992}
30993
30994/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30995///
30996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
30997#[inline]
30998#[target_feature(enable = "avx512f,avx512vl")]
30999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31000#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31001pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31002    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
31003}
31004
31005/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31006///
31007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
31008#[inline]
31009#[target_feature(enable = "avx512f,avx512vl")]
31010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31011#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31012pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31013    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31014}
31015
31016/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31017///
31018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31019#[inline]
31020#[target_feature(enable = "avx512f")]
31021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31022#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31023pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31024    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
31025}
31026
31027/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31028///
31029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31030#[inline]
31031#[target_feature(enable = "avx512f")]
31032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31033#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31034pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31035    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31036}
31037
31038/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31039///
31040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31041#[inline]
31042#[target_feature(enable = "avx512f,avx512vl")]
31043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31044#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31045pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31046    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
31047}
31048
31049/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31050///
31051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31052#[inline]
31053#[target_feature(enable = "avx512f,avx512vl")]
31054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31055#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31056pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31057    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31058}
31059
31060/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31061///
31062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31063#[inline]
31064#[target_feature(enable = "avx512f,avx512vl")]
31065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31066#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31067pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31068    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
31069}
31070
31071/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31072///
31073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31074#[inline]
31075#[target_feature(enable = "avx512f,avx512vl")]
31076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31077#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31078pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31079    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31080}
31081
31082/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31083///
31084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31085#[inline]
31086#[target_feature(enable = "avx512f")]
31087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31088#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31089pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31090    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
31091}
31092
31093/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31094///
31095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31096#[inline]
31097#[target_feature(enable = "avx512f")]
31098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31099#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31100pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31101    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31102}
31103
31104/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31105///
31106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31107#[inline]
31108#[target_feature(enable = "avx512f,avx512vl")]
31109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31110#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31111pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31112    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
31113}
31114
31115/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31116///
31117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31118#[inline]
31119#[target_feature(enable = "avx512f,avx512vl")]
31120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31121#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31122pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31123    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31124}
31125
31126/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31127///
31128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31129#[inline]
31130#[target_feature(enable = "avx512f,avx512vl")]
31131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31132#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31133pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31134    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
31135}
31136
31137/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31138///
31139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31140#[inline]
31141#[target_feature(enable = "avx512f,avx512vl")]
31142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31143#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31144pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31145    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31146}
31147
31148/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31149///
31150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31151#[inline]
31152#[target_feature(enable = "avx512f")]
31153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31154#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31155pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31156    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
31157}
31158
31159/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31160///
31161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31162#[inline]
31163#[target_feature(enable = "avx512f")]
31164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31165#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31166pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31167    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31168}
31169
31170/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31171///
31172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31173#[inline]
31174#[target_feature(enable = "avx512f,avx512vl")]
31175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31176#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31177pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31178    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
31179}
31180
31181/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31182///
31183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31184#[inline]
31185#[target_feature(enable = "avx512f,avx512vl")]
31186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31187#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31188pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31189    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31190}
31191
31192/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31193///
31194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31195#[inline]
31196#[target_feature(enable = "avx512f,avx512vl")]
31197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31198#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31199pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31200    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
31201}
31202
31203/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31204///
31205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31206#[inline]
31207#[target_feature(enable = "avx512f,avx512vl")]
31208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31209#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31210pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31211    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31212}
31213
31214/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31215///
31216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31217#[inline]
31218#[target_feature(enable = "avx512f")]
31219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31220#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31221pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31222    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
31223}
31224
31225/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31226///
31227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31228#[inline]
31229#[target_feature(enable = "avx512f")]
31230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31231#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31232pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31233    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31234}
31235
31236/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31237///
31238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31239#[inline]
31240#[target_feature(enable = "avx512f,avx512vl")]
31241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31242#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31243pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31244    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
31245}
31246
31247/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31248///
31249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31250#[inline]
31251#[target_feature(enable = "avx512f,avx512vl")]
31252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31253#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31254pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31255    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31256}
31257
31258/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31259///
31260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31261#[inline]
31262#[target_feature(enable = "avx512f,avx512vl")]
31263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31264#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31265pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31266    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
31267}
31268
31269/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31270///
31271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31272#[inline]
31273#[target_feature(enable = "avx512f,avx512vl")]
31274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31275#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31276pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31277    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31278}
31279
31280/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31281///
31282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31283#[inline]
31284#[target_feature(enable = "avx512f")]
31285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31286#[rustc_legacy_const_generics(2)]
31287#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31288pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31289    unsafe {
31290        static_assert_uimm_bits!(IMM3, 3);
31291        let a = a.as_u32x16();
31292        let b = b.as_u32x16();
31293        let r = match IMM3 {
31294            0 => simd_eq(a, b),
31295            1 => simd_lt(a, b),
31296            2 => simd_le(a, b),
31297            3 => i32x16::ZERO,
31298            4 => simd_ne(a, b),
31299            5 => simd_ge(a, b),
31300            6 => simd_gt(a, b),
31301            _ => i32x16::splat(-1),
31302        };
31303        simd_bitmask(r)
31304    }
31305}
31306
31307/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31308///
31309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31310#[inline]
31311#[target_feature(enable = "avx512f")]
31312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31313#[rustc_legacy_const_generics(3)]
31314#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31315pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31316    k1: __mmask16,
31317    a: __m512i,
31318    b: __m512i,
31319) -> __mmask16 {
31320    unsafe {
31321        static_assert_uimm_bits!(IMM3, 3);
31322        let a = a.as_u32x16();
31323        let b = b.as_u32x16();
31324        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31325        let r = match IMM3 {
31326            0 => simd_and(k1, simd_eq(a, b)),
31327            1 => simd_and(k1, simd_lt(a, b)),
31328            2 => simd_and(k1, simd_le(a, b)),
31329            3 => i32x16::ZERO,
31330            4 => simd_and(k1, simd_ne(a, b)),
31331            5 => simd_and(k1, simd_ge(a, b)),
31332            6 => simd_and(k1, simd_gt(a, b)),
31333            _ => k1,
31334        };
31335        simd_bitmask(r)
31336    }
31337}
31338
31339/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31340///
31341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31342#[inline]
31343#[target_feature(enable = "avx512f,avx512vl")]
31344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31345#[rustc_legacy_const_generics(2)]
31346#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31347pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31348    unsafe {
31349        static_assert_uimm_bits!(IMM3, 3);
31350        let a = a.as_u32x8();
31351        let b = b.as_u32x8();
31352        let r = match IMM3 {
31353            0 => simd_eq(a, b),
31354            1 => simd_lt(a, b),
31355            2 => simd_le(a, b),
31356            3 => i32x8::ZERO,
31357            4 => simd_ne(a, b),
31358            5 => simd_ge(a, b),
31359            6 => simd_gt(a, b),
31360            _ => i32x8::splat(-1),
31361        };
31362        simd_bitmask(r)
31363    }
31364}
31365
31366/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31367///
31368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31369#[inline]
31370#[target_feature(enable = "avx512f,avx512vl")]
31371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31372#[rustc_legacy_const_generics(3)]
31373#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31374pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31375    k1: __mmask8,
31376    a: __m256i,
31377    b: __m256i,
31378) -> __mmask8 {
31379    unsafe {
31380        static_assert_uimm_bits!(IMM3, 3);
31381        let a = a.as_u32x8();
31382        let b = b.as_u32x8();
31383        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31384        let r = match IMM3 {
31385            0 => simd_and(k1, simd_eq(a, b)),
31386            1 => simd_and(k1, simd_lt(a, b)),
31387            2 => simd_and(k1, simd_le(a, b)),
31388            3 => i32x8::ZERO,
31389            4 => simd_and(k1, simd_ne(a, b)),
31390            5 => simd_and(k1, simd_ge(a, b)),
31391            6 => simd_and(k1, simd_gt(a, b)),
31392            _ => k1,
31393        };
31394        simd_bitmask(r)
31395    }
31396}
31397
31398/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31399///
31400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31401#[inline]
31402#[target_feature(enable = "avx512f,avx512vl")]
31403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31404#[rustc_legacy_const_generics(2)]
31405#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31406pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31407    unsafe {
31408        static_assert_uimm_bits!(IMM3, 3);
31409        let a = a.as_u32x4();
31410        let b = b.as_u32x4();
31411        let r = match IMM3 {
31412            0 => simd_eq(a, b),
31413            1 => simd_lt(a, b),
31414            2 => simd_le(a, b),
31415            3 => i32x4::ZERO,
31416            4 => simd_ne(a, b),
31417            5 => simd_ge(a, b),
31418            6 => simd_gt(a, b),
31419            _ => i32x4::splat(-1),
31420        };
31421        simd_bitmask(r)
31422    }
31423}
31424
31425/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31426///
31427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31428#[inline]
31429#[target_feature(enable = "avx512f,avx512vl")]
31430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31431#[rustc_legacy_const_generics(3)]
31432#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31433pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31434    k1: __mmask8,
31435    a: __m128i,
31436    b: __m128i,
31437) -> __mmask8 {
31438    unsafe {
31439        static_assert_uimm_bits!(IMM3, 3);
31440        let a = a.as_u32x4();
31441        let b = b.as_u32x4();
31442        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
31443        let r = match IMM3 {
31444            0 => simd_and(k1, simd_eq(a, b)),
31445            1 => simd_and(k1, simd_lt(a, b)),
31446            2 => simd_and(k1, simd_le(a, b)),
31447            3 => i32x4::ZERO,
31448            4 => simd_and(k1, simd_ne(a, b)),
31449            5 => simd_and(k1, simd_ge(a, b)),
31450            6 => simd_and(k1, simd_gt(a, b)),
31451            _ => k1,
31452        };
31453        simd_bitmask(r)
31454    }
31455}
31456
31457/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31458///
31459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31460#[inline]
31461#[target_feature(enable = "avx512f")]
31462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31463#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31464pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31465    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
31466}
31467
31468/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31469///
31470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31471#[inline]
31472#[target_feature(enable = "avx512f")]
31473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31474#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31475pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31476    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31477}
31478
31479/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31482#[inline]
31483#[target_feature(enable = "avx512f,avx512vl")]
31484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31485#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31486pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31487    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
31488}
31489
31490/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31491///
31492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31493#[inline]
31494#[target_feature(enable = "avx512f,avx512vl")]
31495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31496#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31497pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31498    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31499}
31500
31501/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31502///
31503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31504#[inline]
31505#[target_feature(enable = "avx512f,avx512vl")]
31506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31507#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31508pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31509    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
31510}
31511
31512/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31513///
31514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31515#[inline]
31516#[target_feature(enable = "avx512f,avx512vl")]
31517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31518#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31519pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31520    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31521}
31522
31523/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31524///
31525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31526#[inline]
31527#[target_feature(enable = "avx512f")]
31528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31529#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31530pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31531    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
31532}
31533
31534/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31535///
31536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31537#[inline]
31538#[target_feature(enable = "avx512f")]
31539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31540#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31541pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31542    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31543}
31544
31545/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31546///
31547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31548#[inline]
31549#[target_feature(enable = "avx512f,avx512vl")]
31550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31551#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31552pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31553    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
31554}
31555
31556/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31557///
31558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31559#[inline]
31560#[target_feature(enable = "avx512f,avx512vl")]
31561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31562#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31563pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31564    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31565}
31566
31567/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31568///
31569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31570#[inline]
31571#[target_feature(enable = "avx512f,avx512vl")]
31572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31573#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31574pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31575    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
31576}
31577
31578/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31579///
31580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31581#[inline]
31582#[target_feature(enable = "avx512f,avx512vl")]
31583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31584#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31585pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31586    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31587}
31588
31589/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31590///
31591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31592#[inline]
31593#[target_feature(enable = "avx512f")]
31594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31595#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31596pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31597    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
31598}
31599
31600/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31601///
31602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31603#[inline]
31604#[target_feature(enable = "avx512f")]
31605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31606#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31607pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31608    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31609}
31610
31611/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31612///
31613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31614#[inline]
31615#[target_feature(enable = "avx512f,avx512vl")]
31616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31617#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31618pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31619    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
31620}
31621
31622/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31623///
31624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31625#[inline]
31626#[target_feature(enable = "avx512f,avx512vl")]
31627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31628#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31629pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31630    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31631}
31632
31633/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31634///
31635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31636#[inline]
31637#[target_feature(enable = "avx512f,avx512vl")]
31638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31639#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31640pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31641    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
31642}
31643
31644/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31645///
31646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31647#[inline]
31648#[target_feature(enable = "avx512f,avx512vl")]
31649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31650#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31651pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31652    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31653}
31654
31655/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31656///
31657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31658#[inline]
31659#[target_feature(enable = "avx512f")]
31660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31661#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31662pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31663    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
31664}
31665
31666/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31667///
31668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31669#[inline]
31670#[target_feature(enable = "avx512f")]
31671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31672#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31673pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31674    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31675}
31676
31677/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31678///
31679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31680#[inline]
31681#[target_feature(enable = "avx512f,avx512vl")]
31682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31683#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31684pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31685    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
31686}
31687
31688/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31689///
31690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31691#[inline]
31692#[target_feature(enable = "avx512f,avx512vl")]
31693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31694#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31695pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31696    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31697}
31698
31699/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31700///
31701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31702#[inline]
31703#[target_feature(enable = "avx512f,avx512vl")]
31704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31705#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31706pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31707    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
31708}
31709
31710/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31711///
31712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31713#[inline]
31714#[target_feature(enable = "avx512f,avx512vl")]
31715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31716#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31717pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31718    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31719}
31720
31721/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31722///
31723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31724#[inline]
31725#[target_feature(enable = "avx512f")]
31726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31727#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31728pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31729    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
31730}
31731
31732/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31733///
31734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31735#[inline]
31736#[target_feature(enable = "avx512f")]
31737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31738#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31739pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31740    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31741}
31742
31743/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31744///
31745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31746#[inline]
31747#[target_feature(enable = "avx512f,avx512vl")]
31748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31749#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31750pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31751    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
31752}
31753
31754/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31755///
31756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31757#[inline]
31758#[target_feature(enable = "avx512f,avx512vl")]
31759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31760#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31761pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31762    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31763}
31764
31765/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31766///
31767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31768#[inline]
31769#[target_feature(enable = "avx512f,avx512vl")]
31770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31771#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31772pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31773    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
31774}
31775
31776/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31777///
31778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31779#[inline]
31780#[target_feature(enable = "avx512f,avx512vl")]
31781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31782#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31783pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31784    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31785}
31786
31787/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31788///
31789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31790#[inline]
31791#[target_feature(enable = "avx512f")]
31792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31793#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31794pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31795    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
31796}
31797
31798/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31799///
31800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31801#[inline]
31802#[target_feature(enable = "avx512f")]
31803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31804#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31805pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31806    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31807}
31808
31809/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31810///
31811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31812#[inline]
31813#[target_feature(enable = "avx512f,avx512vl")]
31814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31815#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31816pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31817    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
31818}
31819
31820/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31821///
31822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31823#[inline]
31824#[target_feature(enable = "avx512f,avx512vl")]
31825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31826#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31827pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31828    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31829}
31830
31831/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31832///
31833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31834#[inline]
31835#[target_feature(enable = "avx512f,avx512vl")]
31836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31837#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31838pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31839    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
31840}
31841
31842/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31843///
31844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31845#[inline]
31846#[target_feature(enable = "avx512f,avx512vl")]
31847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31848#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31849pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31850    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31851}
31852
31853/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31854///
31855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31856#[inline]
31857#[target_feature(enable = "avx512f")]
31858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31859#[rustc_legacy_const_generics(2)]
31860#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31861pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31862    unsafe {
31863        static_assert_uimm_bits!(IMM3, 3);
31864        let a = a.as_i32x16();
31865        let b = b.as_i32x16();
31866        let r = match IMM3 {
31867            0 => simd_eq(a, b),
31868            1 => simd_lt(a, b),
31869            2 => simd_le(a, b),
31870            3 => i32x16::ZERO,
31871            4 => simd_ne(a, b),
31872            5 => simd_ge(a, b),
31873            6 => simd_gt(a, b),
31874            _ => i32x16::splat(-1),
31875        };
31876        simd_bitmask(r)
31877    }
31878}
31879
31880/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881///
31882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31883#[inline]
31884#[target_feature(enable = "avx512f")]
31885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31886#[rustc_legacy_const_generics(3)]
31887#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31888pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31889    k1: __mmask16,
31890    a: __m512i,
31891    b: __m512i,
31892) -> __mmask16 {
31893    unsafe {
31894        static_assert_uimm_bits!(IMM3, 3);
31895        let a = a.as_i32x16();
31896        let b = b.as_i32x16();
31897        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31898        let r = match IMM3 {
31899            0 => simd_and(k1, simd_eq(a, b)),
31900            1 => simd_and(k1, simd_lt(a, b)),
31901            2 => simd_and(k1, simd_le(a, b)),
31902            3 => i32x16::ZERO,
31903            4 => simd_and(k1, simd_ne(a, b)),
31904            5 => simd_and(k1, simd_ge(a, b)),
31905            6 => simd_and(k1, simd_gt(a, b)),
31906            _ => k1,
31907        };
31908        simd_bitmask(r)
31909    }
31910}
31911
31912/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31913///
31914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31915#[inline]
31916#[target_feature(enable = "avx512f,avx512vl")]
31917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31918#[rustc_legacy_const_generics(2)]
31919#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31920pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31921    unsafe {
31922        static_assert_uimm_bits!(IMM3, 3);
31923        let a = a.as_i32x8();
31924        let b = b.as_i32x8();
31925        let r = match IMM3 {
31926            0 => simd_eq(a, b),
31927            1 => simd_lt(a, b),
31928            2 => simd_le(a, b),
31929            3 => i32x8::ZERO,
31930            4 => simd_ne(a, b),
31931            5 => simd_ge(a, b),
31932            6 => simd_gt(a, b),
31933            _ => i32x8::splat(-1),
31934        };
31935        simd_bitmask(r)
31936    }
31937}
31938
31939/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31940///
31941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31942#[inline]
31943#[target_feature(enable = "avx512f,avx512vl")]
31944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31945#[rustc_legacy_const_generics(3)]
31946#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31947pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31948    k1: __mmask8,
31949    a: __m256i,
31950    b: __m256i,
31951) -> __mmask8 {
31952    unsafe {
31953        static_assert_uimm_bits!(IMM3, 3);
31954        let a = a.as_i32x8();
31955        let b = b.as_i32x8();
31956        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31957        let r = match IMM3 {
31958            0 => simd_and(k1, simd_eq(a, b)),
31959            1 => simd_and(k1, simd_lt(a, b)),
31960            2 => simd_and(k1, simd_le(a, b)),
31961            3 => i32x8::ZERO,
31962            4 => simd_and(k1, simd_ne(a, b)),
31963            5 => simd_and(k1, simd_ge(a, b)),
31964            6 => simd_and(k1, simd_gt(a, b)),
31965            _ => k1,
31966        };
31967        simd_bitmask(r)
31968    }
31969}
31970
31971/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31972///
31973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31974#[inline]
31975#[target_feature(enable = "avx512f,avx512vl")]
31976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31977#[rustc_legacy_const_generics(2)]
31978#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31979pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31980    unsafe {
31981        static_assert_uimm_bits!(IMM3, 3);
31982        let a = a.as_i32x4();
31983        let b = b.as_i32x4();
31984        let r = match IMM3 {
31985            0 => simd_eq(a, b),
31986            1 => simd_lt(a, b),
31987            2 => simd_le(a, b),
31988            3 => i32x4::ZERO,
31989            4 => simd_ne(a, b),
31990            5 => simd_ge(a, b),
31991            6 => simd_gt(a, b),
31992            _ => i32x4::splat(-1),
31993        };
31994        simd_bitmask(r)
31995    }
31996}
31997
31998/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31999///
32000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
32001#[inline]
32002#[target_feature(enable = "avx512f,avx512vl")]
32003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32004#[rustc_legacy_const_generics(3)]
32005#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32006pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
32007    k1: __mmask8,
32008    a: __m128i,
32009    b: __m128i,
32010) -> __mmask8 {
32011    unsafe {
32012        static_assert_uimm_bits!(IMM3, 3);
32013        let a = a.as_i32x4();
32014        let b = b.as_i32x4();
32015        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32016        let r = match IMM3 {
32017            0 => simd_and(k1, simd_eq(a, b)),
32018            1 => simd_and(k1, simd_lt(a, b)),
32019            2 => simd_and(k1, simd_le(a, b)),
32020            3 => i32x4::ZERO,
32021            4 => simd_and(k1, simd_ne(a, b)),
32022            5 => simd_and(k1, simd_ge(a, b)),
32023            6 => simd_and(k1, simd_gt(a, b)),
32024            _ => k1,
32025        };
32026        simd_bitmask(r)
32027    }
32028}
32029
32030/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32031///
32032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32033#[inline]
32034#[target_feature(enable = "avx512f")]
32035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32036#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32037pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32038    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
32039}
32040
32041/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32042///
32043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32044#[inline]
32045#[target_feature(enable = "avx512f")]
32046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32048pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32049    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32050}
32051
32052/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32053///
32054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32055#[inline]
32056#[target_feature(enable = "avx512f,avx512vl")]
32057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32058#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32059pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32060    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
32061}
32062
32063/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32064///
32065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32066#[inline]
32067#[target_feature(enable = "avx512f,avx512vl")]
32068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32069#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32070pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32071    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32072}
32073
32074/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32075///
32076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32077#[inline]
32078#[target_feature(enable = "avx512f,avx512vl")]
32079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32080#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32081pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32082    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
32083}
32084
32085/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32086///
32087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32088#[inline]
32089#[target_feature(enable = "avx512f,avx512vl")]
32090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32091#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32092pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32093    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32094}
32095
32096/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32097///
32098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32099#[inline]
32100#[target_feature(enable = "avx512f")]
32101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32102#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32103pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32104    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
32105}
32106
32107/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32108///
32109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32110#[inline]
32111#[target_feature(enable = "avx512f")]
32112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32113#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32114pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32115    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32116}
32117
32118/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32119///
32120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32121#[inline]
32122#[target_feature(enable = "avx512f,avx512vl")]
32123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32124#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32125pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32126    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
32127}
32128
32129/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32130///
32131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32132#[inline]
32133#[target_feature(enable = "avx512f,avx512vl")]
32134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32135#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32136pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32137    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32138}
32139
32140/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32141///
32142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32143#[inline]
32144#[target_feature(enable = "avx512f,avx512vl")]
32145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32146#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32147pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32148    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
32149}
32150
32151/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32152///
32153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32154#[inline]
32155#[target_feature(enable = "avx512f,avx512vl")]
32156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32158pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32159    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32160}
32161
32162/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32163///
32164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32165#[inline]
32166#[target_feature(enable = "avx512f")]
32167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32168#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32169pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32170    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
32171}
32172
32173/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32174///
32175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32176#[inline]
32177#[target_feature(enable = "avx512f")]
32178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32180pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32181    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32182}
32183
32184/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32185///
32186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32187#[inline]
32188#[target_feature(enable = "avx512f,avx512vl")]
32189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32190#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32191pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32192    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
32193}
32194
32195/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32196///
32197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32198#[inline]
32199#[target_feature(enable = "avx512f,avx512vl")]
32200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32201#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32202pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32203    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32204}
32205
32206/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32207///
32208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32209#[inline]
32210#[target_feature(enable = "avx512f,avx512vl")]
32211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32212#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32213pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32214    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
32215}
32216
32217/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32218///
32219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32220#[inline]
32221#[target_feature(enable = "avx512f,avx512vl")]
32222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32223#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32224pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32225    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32226}
32227
32228/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32229///
32230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32231#[inline]
32232#[target_feature(enable = "avx512f")]
32233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32234#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32235pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32236    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
32237}
32238
32239/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32240///
32241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32242#[inline]
32243#[target_feature(enable = "avx512f")]
32244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32245#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32246pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32247    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32248}
32249
32250/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32251///
32252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32253#[inline]
32254#[target_feature(enable = "avx512f,avx512vl")]
32255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32256#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32257pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32258    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
32259}
32260
32261/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32262///
32263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32264#[inline]
32265#[target_feature(enable = "avx512f,avx512vl")]
32266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32267#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32268pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32269    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32270}
32271
32272/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32273///
32274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32275#[inline]
32276#[target_feature(enable = "avx512f,avx512vl")]
32277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32278#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32279pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32280    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
32281}
32282
32283/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32284///
32285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32286#[inline]
32287#[target_feature(enable = "avx512f,avx512vl")]
32288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32289#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32290pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32291    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32292}
32293
32294/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32295///
32296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32297#[inline]
32298#[target_feature(enable = "avx512f")]
32299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32300#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32301pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32302    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
32303}
32304
32305/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32306///
32307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32308#[inline]
32309#[target_feature(enable = "avx512f")]
32310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32311#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32312pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32313    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32314}
32315
32316/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32317///
32318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32319#[inline]
32320#[target_feature(enable = "avx512f,avx512vl")]
32321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32322#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32323pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32324    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
32325}
32326
32327/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32328///
32329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32330#[inline]
32331#[target_feature(enable = "avx512f,avx512vl")]
32332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32333#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32334pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32335    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32336}
32337
32338/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32339///
32340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32341#[inline]
32342#[target_feature(enable = "avx512f,avx512vl")]
32343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32344#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32345pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32346    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
32347}
32348
32349/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32350///
32351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32352#[inline]
32353#[target_feature(enable = "avx512f,avx512vl")]
32354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32355#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32356pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32357    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32358}
32359
32360/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32361///
32362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32363#[inline]
32364#[target_feature(enable = "avx512f")]
32365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32366#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32367pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32368    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
32369}
32370
32371/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32372///
32373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32374#[inline]
32375#[target_feature(enable = "avx512f")]
32376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32377#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32378pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32379    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32380}
32381
32382/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32383///
32384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32385#[inline]
32386#[target_feature(enable = "avx512f,avx512vl")]
32387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32388#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32389pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32390    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
32391}
32392
32393/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32394///
32395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32396#[inline]
32397#[target_feature(enable = "avx512f,avx512vl")]
32398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32399#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32400pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32401    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32402}
32403
32404/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32405///
32406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32407#[inline]
32408#[target_feature(enable = "avx512f,avx512vl")]
32409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32410#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32411pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32412    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
32413}
32414
32415/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32416///
32417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32418#[inline]
32419#[target_feature(enable = "avx512f,avx512vl")]
32420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32421#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32422pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32423    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32424}
32425
32426/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32427///
32428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32429#[inline]
32430#[target_feature(enable = "avx512f")]
32431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32432#[rustc_legacy_const_generics(2)]
32433#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32434pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32435    unsafe {
32436        static_assert_uimm_bits!(IMM3, 3);
32437        let a = a.as_u64x8();
32438        let b = b.as_u64x8();
32439        let r = match IMM3 {
32440            0 => simd_eq(a, b),
32441            1 => simd_lt(a, b),
32442            2 => simd_le(a, b),
32443            3 => i64x8::ZERO,
32444            4 => simd_ne(a, b),
32445            5 => simd_ge(a, b),
32446            6 => simd_gt(a, b),
32447            _ => i64x8::splat(-1),
32448        };
32449        simd_bitmask(r)
32450    }
32451}
32452
32453/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32454///
32455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32456#[inline]
32457#[target_feature(enable = "avx512f")]
32458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32459#[rustc_legacy_const_generics(3)]
32460#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32461pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32462    k1: __mmask8,
32463    a: __m512i,
32464    b: __m512i,
32465) -> __mmask8 {
32466    unsafe {
32467        static_assert_uimm_bits!(IMM3, 3);
32468        let a = a.as_u64x8();
32469        let b = b.as_u64x8();
32470        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
32471        let r = match IMM3 {
32472            0 => simd_and(k1, simd_eq(a, b)),
32473            1 => simd_and(k1, simd_lt(a, b)),
32474            2 => simd_and(k1, simd_le(a, b)),
32475            3 => i64x8::ZERO,
32476            4 => simd_and(k1, simd_ne(a, b)),
32477            5 => simd_and(k1, simd_ge(a, b)),
32478            6 => simd_and(k1, simd_gt(a, b)),
32479            _ => k1,
32480        };
32481        simd_bitmask(r)
32482    }
32483}
32484
32485/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32486///
32487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32488#[inline]
32489#[target_feature(enable = "avx512f,avx512vl")]
32490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32491#[rustc_legacy_const_generics(2)]
32492#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32493pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32494    unsafe {
32495        static_assert_uimm_bits!(IMM3, 3);
32496        let a = a.as_u64x4();
32497        let b = b.as_u64x4();
32498        let r = match IMM3 {
32499            0 => simd_eq(a, b),
32500            1 => simd_lt(a, b),
32501            2 => simd_le(a, b),
32502            3 => i64x4::ZERO,
32503            4 => simd_ne(a, b),
32504            5 => simd_ge(a, b),
32505            6 => simd_gt(a, b),
32506            _ => i64x4::splat(-1),
32507        };
32508        simd_bitmask(r)
32509    }
32510}
32511
32512/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32513///
32514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32515#[inline]
32516#[target_feature(enable = "avx512f,avx512vl")]
32517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32518#[rustc_legacy_const_generics(3)]
32519#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32520pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32521    k1: __mmask8,
32522    a: __m256i,
32523    b: __m256i,
32524) -> __mmask8 {
32525    unsafe {
32526        static_assert_uimm_bits!(IMM3, 3);
32527        let a = a.as_u64x4();
32528        let b = b.as_u64x4();
32529        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
32530        let r = match IMM3 {
32531            0 => simd_and(k1, simd_eq(a, b)),
32532            1 => simd_and(k1, simd_lt(a, b)),
32533            2 => simd_and(k1, simd_le(a, b)),
32534            3 => i64x4::ZERO,
32535            4 => simd_and(k1, simd_ne(a, b)),
32536            5 => simd_and(k1, simd_ge(a, b)),
32537            6 => simd_and(k1, simd_gt(a, b)),
32538            _ => k1,
32539        };
32540        simd_bitmask(r)
32541    }
32542}
32543
32544/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32545///
32546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32547#[inline]
32548#[target_feature(enable = "avx512f,avx512vl")]
32549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32550#[rustc_legacy_const_generics(2)]
32551#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32552pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32553    unsafe {
32554        static_assert_uimm_bits!(IMM3, 3);
32555        let a = a.as_u64x2();
32556        let b = b.as_u64x2();
32557        let r = match IMM3 {
32558            0 => simd_eq(a, b),
32559            1 => simd_lt(a, b),
32560            2 => simd_le(a, b),
32561            3 => i64x2::ZERO,
32562            4 => simd_ne(a, b),
32563            5 => simd_ge(a, b),
32564            6 => simd_gt(a, b),
32565            _ => i64x2::splat(-1),
32566        };
32567        simd_bitmask(r)
32568    }
32569}
32570
32571/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32572///
32573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32574#[inline]
32575#[target_feature(enable = "avx512f,avx512vl")]
32576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32577#[rustc_legacy_const_generics(3)]
32578#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32579pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32580    k1: __mmask8,
32581    a: __m128i,
32582    b: __m128i,
32583) -> __mmask8 {
32584    unsafe {
32585        static_assert_uimm_bits!(IMM3, 3);
32586        let a = a.as_u64x2();
32587        let b = b.as_u64x2();
32588        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
32589        let r = match IMM3 {
32590            0 => simd_and(k1, simd_eq(a, b)),
32591            1 => simd_and(k1, simd_lt(a, b)),
32592            2 => simd_and(k1, simd_le(a, b)),
32593            3 => i64x2::ZERO,
32594            4 => simd_and(k1, simd_ne(a, b)),
32595            5 => simd_and(k1, simd_ge(a, b)),
32596            6 => simd_and(k1, simd_gt(a, b)),
32597            _ => k1,
32598        };
32599        simd_bitmask(r)
32600    }
32601}
32602
32603/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32604///
32605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32606#[inline]
32607#[target_feature(enable = "avx512f")]
32608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32609#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32610pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32611    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
32612}
32613
32614/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32615///
32616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32617#[inline]
32618#[target_feature(enable = "avx512f")]
32619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32620#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32621pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32622    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32623}
32624
32625/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32626///
32627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32628#[inline]
32629#[target_feature(enable = "avx512f,avx512vl")]
32630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32631#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32632pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32633    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
32634}
32635
32636/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637///
32638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32639#[inline]
32640#[target_feature(enable = "avx512f,avx512vl")]
32641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32642#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32643pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32644    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32645}
32646
32647/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32648///
32649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32650#[inline]
32651#[target_feature(enable = "avx512f,avx512vl")]
32652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32653#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32654pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32655    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
32656}
32657
32658/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32659///
32660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32661#[inline]
32662#[target_feature(enable = "avx512f,avx512vl")]
32663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32664#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32665pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32666    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32667}
32668
32669/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32670///
32671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32672#[inline]
32673#[target_feature(enable = "avx512f")]
32674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32675#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32676pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32677    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
32678}
32679
32680/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32681///
32682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32683#[inline]
32684#[target_feature(enable = "avx512f")]
32685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32686#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32687pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32688    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32689}
32690
32691/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32692///
32693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32694#[inline]
32695#[target_feature(enable = "avx512f,avx512vl")]
32696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32697#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32698pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32699    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
32700}
32701
32702/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32703///
32704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32705#[inline]
32706#[target_feature(enable = "avx512f,avx512vl")]
32707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32708#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32709pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32710    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32711}
32712
32713/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32714///
32715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32716#[inline]
32717#[target_feature(enable = "avx512f,avx512vl")]
32718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32719#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32720pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32721    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
32722}
32723
32724/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32725///
32726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32727#[inline]
32728#[target_feature(enable = "avx512f,avx512vl")]
32729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32730#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32731pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32732    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32733}
32734
32735/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32736///
32737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32738#[inline]
32739#[target_feature(enable = "avx512f")]
32740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32741#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32742pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32743    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
32744}
32745
32746/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32747///
32748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32749#[inline]
32750#[target_feature(enable = "avx512f")]
32751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32752#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32753pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32754    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32755}
32756
32757/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32758///
32759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32760#[inline]
32761#[target_feature(enable = "avx512f,avx512vl")]
32762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32763#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32764pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32765    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
32766}
32767
32768/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32769///
32770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32771#[inline]
32772#[target_feature(enable = "avx512f,avx512vl")]
32773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32774#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32775pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32776    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32777}
32778
32779/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32780///
32781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32782#[inline]
32783#[target_feature(enable = "avx512f,avx512vl")]
32784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32785#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32786pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32787    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
32788}
32789
32790/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32791///
32792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32793#[inline]
32794#[target_feature(enable = "avx512f,avx512vl")]
32795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32796#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32797pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32798    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32799}
32800
32801/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32802///
32803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32804#[inline]
32805#[target_feature(enable = "avx512f")]
32806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32807#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32808pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32809    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
32810}
32811
32812/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32813///
32814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32815#[inline]
32816#[target_feature(enable = "avx512f")]
32817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32818#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32819pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32820    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32821}
32822
32823/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32824///
32825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32826#[inline]
32827#[target_feature(enable = "avx512f,avx512vl")]
32828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32829#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32830pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32831    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
32832}
32833
32834/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32835///
32836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32837#[inline]
32838#[target_feature(enable = "avx512f,avx512vl")]
32839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32840#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32841pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32842    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32843}
32844
32845/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32846///
32847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32848#[inline]
32849#[target_feature(enable = "avx512f,avx512vl")]
32850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32851#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32852pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32853    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
32854}
32855
32856/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32857///
32858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32859#[inline]
32860#[target_feature(enable = "avx512f,avx512vl")]
32861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32862#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32863pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32864    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32865}
32866
32867/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32868///
32869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32870#[inline]
32871#[target_feature(enable = "avx512f")]
32872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32873#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32874pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32875    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
32876}
32877
32878/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32879///
32880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32881#[inline]
32882#[target_feature(enable = "avx512f")]
32883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32884#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32885pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32886    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32887}
32888
32889/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32890///
32891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32892#[inline]
32893#[target_feature(enable = "avx512f,avx512vl")]
32894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32895#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32896pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32897    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
32898}
32899
32900/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32901///
32902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32903#[inline]
32904#[target_feature(enable = "avx512f,avx512vl")]
32905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32906#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32907pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32908    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32909}
32910
32911/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32912///
32913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32914#[inline]
32915#[target_feature(enable = "avx512f,avx512vl")]
32916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32917#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32918pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32919    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
32920}
32921
32922/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32923///
32924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32925#[inline]
32926#[target_feature(enable = "avx512f,avx512vl")]
32927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32928#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32929pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32930    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32931}
32932
32933/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32934///
32935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32936#[inline]
32937#[target_feature(enable = "avx512f")]
32938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32939#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32940pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32941    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
32942}
32943
32944/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32945///
32946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32947#[inline]
32948#[target_feature(enable = "avx512f")]
32949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32950#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32951pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32952    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32953}
32954
32955/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32956///
32957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32958#[inline]
32959#[target_feature(enable = "avx512f,avx512vl")]
32960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32961#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32962pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32963    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
32964}
32965
32966/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32967///
32968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32969#[inline]
32970#[target_feature(enable = "avx512f,avx512vl")]
32971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32972#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32973pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32974    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32975}
32976
32977/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32978///
32979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
32980#[inline]
32981#[target_feature(enable = "avx512f,avx512vl")]
32982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32983#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32984pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32985    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
32986}
32987
32988/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32989///
32990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
32991#[inline]
32992#[target_feature(enable = "avx512f,avx512vl")]
32993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32994#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32995pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32996    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32997}
32998
32999/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33000///
33001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
33002#[inline]
33003#[target_feature(enable = "avx512f")]
33004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33005#[rustc_legacy_const_generics(2)]
33006#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33007pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
33008    unsafe {
33009        static_assert_uimm_bits!(IMM3, 3);
33010        let a = a.as_i64x8();
33011        let b = b.as_i64x8();
33012        let r = match IMM3 {
33013            0 => simd_eq(a, b),
33014            1 => simd_lt(a, b),
33015            2 => simd_le(a, b),
33016            3 => i64x8::ZERO,
33017            4 => simd_ne(a, b),
33018            5 => simd_ge(a, b),
33019            6 => simd_gt(a, b),
33020            _ => i64x8::splat(-1),
33021        };
33022        simd_bitmask(r)
33023    }
33024}
33025
33026/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33027///
33028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33029#[inline]
33030#[target_feature(enable = "avx512f")]
33031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33032#[rustc_legacy_const_generics(3)]
33033#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33034pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33035    k1: __mmask8,
33036    a: __m512i,
33037    b: __m512i,
33038) -> __mmask8 {
33039    unsafe {
33040        static_assert_uimm_bits!(IMM3, 3);
33041        let a = a.as_i64x8();
33042        let b = b.as_i64x8();
33043        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33044        let r = match IMM3 {
33045            0 => simd_and(k1, simd_eq(a, b)),
33046            1 => simd_and(k1, simd_lt(a, b)),
33047            2 => simd_and(k1, simd_le(a, b)),
33048            3 => i64x8::ZERO,
33049            4 => simd_and(k1, simd_ne(a, b)),
33050            5 => simd_and(k1, simd_ge(a, b)),
33051            6 => simd_and(k1, simd_gt(a, b)),
33052            _ => k1,
33053        };
33054        simd_bitmask(r)
33055    }
33056}
33057
33058/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33059///
33060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33061#[inline]
33062#[target_feature(enable = "avx512f,avx512vl")]
33063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33064#[rustc_legacy_const_generics(2)]
33065#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33066pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33067    unsafe {
33068        static_assert_uimm_bits!(IMM3, 3);
33069        let a = a.as_i64x4();
33070        let b = b.as_i64x4();
33071        let r = match IMM3 {
33072            0 => simd_eq(a, b),
33073            1 => simd_lt(a, b),
33074            2 => simd_le(a, b),
33075            3 => i64x4::ZERO,
33076            4 => simd_ne(a, b),
33077            5 => simd_ge(a, b),
33078            6 => simd_gt(a, b),
33079            _ => i64x4::splat(-1),
33080        };
33081        simd_bitmask(r)
33082    }
33083}
33084
33085/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33086///
33087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33088#[inline]
33089#[target_feature(enable = "avx512f,avx512vl")]
33090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33091#[rustc_legacy_const_generics(3)]
33092#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33093pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33094    k1: __mmask8,
33095    a: __m256i,
33096    b: __m256i,
33097) -> __mmask8 {
33098    unsafe {
33099        static_assert_uimm_bits!(IMM3, 3);
33100        let a = a.as_i64x4();
33101        let b = b.as_i64x4();
33102        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
33103        let r = match IMM3 {
33104            0 => simd_and(k1, simd_eq(a, b)),
33105            1 => simd_and(k1, simd_lt(a, b)),
33106            2 => simd_and(k1, simd_le(a, b)),
33107            3 => i64x4::ZERO,
33108            4 => simd_and(k1, simd_ne(a, b)),
33109            5 => simd_and(k1, simd_ge(a, b)),
33110            6 => simd_and(k1, simd_gt(a, b)),
33111            _ => k1,
33112        };
33113        simd_bitmask(r)
33114    }
33115}
33116
33117/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33118///
33119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33120#[inline]
33121#[target_feature(enable = "avx512f,avx512vl")]
33122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33123#[rustc_legacy_const_generics(2)]
33124#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33125pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33126    unsafe {
33127        static_assert_uimm_bits!(IMM3, 3);
33128        let a = a.as_i64x2();
33129        let b = b.as_i64x2();
33130        let r = match IMM3 {
33131            0 => simd_eq(a, b),
33132            1 => simd_lt(a, b),
33133            2 => simd_le(a, b),
33134            3 => i64x2::ZERO,
33135            4 => simd_ne(a, b),
33136            5 => simd_ge(a, b),
33137            6 => simd_gt(a, b),
33138            _ => i64x2::splat(-1),
33139        };
33140        simd_bitmask(r)
33141    }
33142}
33143
33144/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33145///
33146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33147#[inline]
33148#[target_feature(enable = "avx512f,avx512vl")]
33149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33150#[rustc_legacy_const_generics(3)]
33151#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33152pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33153    k1: __mmask8,
33154    a: __m128i,
33155    b: __m128i,
33156) -> __mmask8 {
33157    unsafe {
33158        static_assert_uimm_bits!(IMM3, 3);
33159        let a = a.as_i64x2();
33160        let b = b.as_i64x2();
33161        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
33162        let r = match IMM3 {
33163            0 => simd_and(k1, simd_eq(a, b)),
33164            1 => simd_and(k1, simd_lt(a, b)),
33165            2 => simd_and(k1, simd_le(a, b)),
33166            3 => i64x2::ZERO,
33167            4 => simd_and(k1, simd_ne(a, b)),
33168            5 => simd_and(k1, simd_ge(a, b)),
33169            6 => simd_and(k1, simd_gt(a, b)),
33170            _ => k1,
33171        };
33172        simd_bitmask(r)
33173    }
33174}
33175
33176/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33177///
33178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33179#[inline]
33180#[target_feature(enable = "avx512f")]
33181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33182pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33183    unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33184}
33185
33186/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33187///
33188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33189#[inline]
33190#[target_feature(enable = "avx512f")]
33191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33192pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33193    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33194}
33195
33196/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33197///
33198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33199#[inline]
33200#[target_feature(enable = "avx512f")]
33201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33202pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33203    unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33204}
33205
33206/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33207///
33208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33209#[inline]
33210#[target_feature(enable = "avx512f")]
33211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33212pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33213    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33214}
33215
33216/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33217///
33218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33219#[inline]
33220#[target_feature(enable = "avx512f")]
33221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33222pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33223    unsafe {
33224        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33225        let a = _mm256_add_ps(
33226            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33227            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33228        );
33229        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33230        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33231        simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
33232    }
33233}
33234
33235/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33236///
33237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33238#[inline]
33239#[target_feature(enable = "avx512f")]
33240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33241pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33242    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
33243}
33244
33245/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33246///
33247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33248#[inline]
33249#[target_feature(enable = "avx512f")]
33250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33251pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33252    unsafe {
33253        let a = _mm256_add_pd(
33254            _mm512_extractf64x4_pd::<0>(a),
33255            _mm512_extractf64x4_pd::<1>(a),
33256        );
33257        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33258        simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
33259    }
33260}
33261
33262/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33263///
33264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33265#[inline]
33266#[target_feature(enable = "avx512f")]
33267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33268pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33269    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
33270}
33271
33272/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33273///
33274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33275#[inline]
33276#[target_feature(enable = "avx512f")]
33277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33278pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33279    unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33280}
33281
33282/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33283///
33284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33285#[inline]
33286#[target_feature(enable = "avx512f")]
33287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33288pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33289    unsafe {
33290        simd_reduce_mul_unordered(simd_select_bitmask(
33291            k,
33292            a.as_i32x16(),
33293            _mm512_set1_epi32(1).as_i32x16(),
33294        ))
33295    }
33296}
33297
33298/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33299///
33300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33301#[inline]
33302#[target_feature(enable = "avx512f")]
33303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33304pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33305    unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33306}
33307
33308/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33309///
33310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33311#[inline]
33312#[target_feature(enable = "avx512f")]
33313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33314pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33315    unsafe {
33316        simd_reduce_mul_unordered(simd_select_bitmask(
33317            k,
33318            a.as_i64x8(),
33319            _mm512_set1_epi64(1).as_i64x8(),
33320        ))
33321    }
33322}
33323
33324/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33325///
33326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33327#[inline]
33328#[target_feature(enable = "avx512f")]
33329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33330pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33331    unsafe {
33332        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33333        let a = _mm256_mul_ps(
33334            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33335            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33336        );
33337        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33338        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33339        simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
33340    }
33341}
33342
33343/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33344///
33345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33346#[inline]
33347#[target_feature(enable = "avx512f")]
33348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33349pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33350    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
33351}
33352
33353/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33354///
33355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33356#[inline]
33357#[target_feature(enable = "avx512f")]
33358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33359pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33360    unsafe {
33361        let a = _mm256_mul_pd(
33362            _mm512_extractf64x4_pd::<0>(a),
33363            _mm512_extractf64x4_pd::<1>(a),
33364        );
33365        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33366        simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
33367    }
33368}
33369
33370/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33371///
33372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33373#[inline]
33374#[target_feature(enable = "avx512f")]
33375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33376pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33377    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
33378}
33379
33380/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33381///
33382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33383#[inline]
33384#[target_feature(enable = "avx512f")]
33385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33386pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33387    unsafe { simd_reduce_max(a.as_i32x16()) }
33388}
33389
33390/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33391///
33392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33393#[inline]
33394#[target_feature(enable = "avx512f")]
33395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33396pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33397    unsafe {
33398        simd_reduce_max(simd_select_bitmask(
33399            k,
33400            a.as_i32x16(),
33401            i32x16::splat(i32::MIN),
33402        ))
33403    }
33404}
33405
33406/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33407///
33408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33409#[inline]
33410#[target_feature(enable = "avx512f")]
33411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33412pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33413    unsafe { simd_reduce_max(a.as_i64x8()) }
33414}
33415
33416/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33417///
33418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33419#[inline]
33420#[target_feature(enable = "avx512f")]
33421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33422pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33423    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
33424}
33425
33426/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33427///
33428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33429#[inline]
33430#[target_feature(enable = "avx512f")]
33431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33432pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33433    unsafe { simd_reduce_max(a.as_u32x16()) }
33434}
33435
33436/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33437///
33438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33439#[inline]
33440#[target_feature(enable = "avx512f")]
33441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33442pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33443    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
33444}
33445
33446/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33447///
33448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33449#[inline]
33450#[target_feature(enable = "avx512f")]
33451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33452pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33453    unsafe { simd_reduce_max(a.as_u64x8()) }
33454}
33455
33456/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33457///
33458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33459#[inline]
33460#[target_feature(enable = "avx512f")]
33461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33462pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33463    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
33464}
33465
33466/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33467///
33468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33469#[inline]
33470#[target_feature(enable = "avx512f")]
33471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33472pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33473    unsafe {
33474        let a = _mm256_max_ps(
33475            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33476            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33477        );
33478        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33479        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33480        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
33481    }
33482}
33483
33484/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33485///
33486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33487#[inline]
33488#[target_feature(enable = "avx512f")]
33489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33490pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33491    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
33492}
33493
33494/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33495///
33496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33497#[inline]
33498#[target_feature(enable = "avx512f")]
33499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33500pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33501    unsafe {
33502        let a = _mm256_max_pd(
33503            _mm512_extractf64x4_pd::<0>(a),
33504            _mm512_extractf64x4_pd::<1>(a),
33505        );
33506        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33507        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
33508    }
33509}
33510
33511/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33512///
33513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33514#[inline]
33515#[target_feature(enable = "avx512f")]
33516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33517pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33518    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
33519}
33520
33521/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33522///
33523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33524#[inline]
33525#[target_feature(enable = "avx512f")]
33526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33527pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33528    unsafe { simd_reduce_min(a.as_i32x16()) }
33529}
33530
33531/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33532///
33533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33534#[inline]
33535#[target_feature(enable = "avx512f")]
33536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33537pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33538    unsafe {
33539        simd_reduce_min(simd_select_bitmask(
33540            k,
33541            a.as_i32x16(),
33542            i32x16::splat(i32::MAX),
33543        ))
33544    }
33545}
33546
33547/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33548///
33549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33550#[inline]
33551#[target_feature(enable = "avx512f")]
33552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33553pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33554    unsafe { simd_reduce_min(a.as_i64x8()) }
33555}
33556
33557/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33558///
33559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33560#[inline]
33561#[target_feature(enable = "avx512f")]
33562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33563pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33564    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
33565}
33566
33567/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33568///
33569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33570#[inline]
33571#[target_feature(enable = "avx512f")]
33572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33573pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33574    unsafe { simd_reduce_min(a.as_u32x16()) }
33575}
33576
33577/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33578///
33579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33580#[inline]
33581#[target_feature(enable = "avx512f")]
33582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33583pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33584    unsafe {
33585        simd_reduce_min(simd_select_bitmask(
33586            k,
33587            a.as_u32x16(),
33588            u32x16::splat(u32::MAX),
33589        ))
33590    }
33591}
33592
33593/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33594///
33595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33596#[inline]
33597#[target_feature(enable = "avx512f")]
33598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33599pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33600    unsafe { simd_reduce_min(a.as_u64x8()) }
33601}
33602
33603/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33604///
33605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33606#[inline]
33607#[target_feature(enable = "avx512f")]
33608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33609pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33610    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
33611}
33612
33613/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33614///
33615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33616#[inline]
33617#[target_feature(enable = "avx512f")]
33618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33619pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33620    unsafe {
33621        let a = _mm256_min_ps(
33622            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33623            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33624        );
33625        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33626        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33627        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
33628    }
33629}
33630
33631/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33632///
33633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33634#[inline]
33635#[target_feature(enable = "avx512f")]
33636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33637pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33638    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
33639}
33640
33641/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33642///
33643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33644#[inline]
33645#[target_feature(enable = "avx512f")]
33646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33647pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33648    unsafe {
33649        let a = _mm256_min_pd(
33650            _mm512_extractf64x4_pd::<0>(a),
33651            _mm512_extractf64x4_pd::<1>(a),
33652        );
33653        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33654        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
33655    }
33656}
33657
33658/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33659///
33660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33661#[inline]
33662#[target_feature(enable = "avx512f")]
33663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33664pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33665    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
33666}
33667
33668/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33669///
33670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33671#[inline]
33672#[target_feature(enable = "avx512f")]
33673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33674pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33675    unsafe { simd_reduce_and(a.as_i32x16()) }
33676}
33677
33678/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33679///
33680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33681#[inline]
33682#[target_feature(enable = "avx512f")]
33683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33684pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33685    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
33686}
33687
33688/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33689///
33690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33691#[inline]
33692#[target_feature(enable = "avx512f")]
33693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33694pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33695    unsafe { simd_reduce_and(a.as_i64x8()) }
33696}
33697
33698/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33701#[inline]
33702#[target_feature(enable = "avx512f")]
33703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33704pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33705    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
33706}
33707
33708/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33709///
33710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33711#[inline]
33712#[target_feature(enable = "avx512f")]
33713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33714pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33715    unsafe { simd_reduce_or(a.as_i32x16()) }
33716}
33717
33718/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33719///
33720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33721#[inline]
33722#[target_feature(enable = "avx512f")]
33723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33724pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33725    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33726}
33727
33728/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33729///
33730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33731#[inline]
33732#[target_feature(enable = "avx512f")]
33733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33734pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33735    unsafe { simd_reduce_or(a.as_i64x8()) }
33736}
33737
33738/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33739///
33740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33741#[inline]
33742#[target_feature(enable = "avx512f")]
33743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33744pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33745    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33746}
33747
33748/// Returns vector of type `__m512d` with indeterminate elements.
33749/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33750/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33751/// In practice, this is typically equivalent to [`mem::zeroed`].
33752///
33753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33754#[inline]
33755#[target_feature(enable = "avx512f")]
33756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33757// This intrinsic has no corresponding instruction.
33758pub fn _mm512_undefined_pd() -> __m512d {
33759    unsafe { const { mem::zeroed() } }
33760}
33761
33762/// Returns vector of type `__m512` with indeterminate elements.
33763/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33764/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33765/// In practice, this is typically equivalent to [`mem::zeroed`].
33766///
33767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33768#[inline]
33769#[target_feature(enable = "avx512f")]
33770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33771// This intrinsic has no corresponding instruction.
33772pub fn _mm512_undefined_ps() -> __m512 {
33773    unsafe { const { mem::zeroed() } }
33774}
33775
33776/// Return vector of type __m512i with indeterminate elements.
33777/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33778/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33779/// In practice, this is typically equivalent to [`mem::zeroed`].
33780///
33781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33782#[inline]
33783#[target_feature(enable = "avx512f")]
33784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33785// This intrinsic has no corresponding instruction.
33786pub fn _mm512_undefined_epi32() -> __m512i {
33787    unsafe { const { mem::zeroed() } }
33788}
33789
33790/// Return vector of type __m512 with indeterminate elements.
33791/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33792/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33793/// In practice, this is typically equivalent to [`mem::zeroed`].
33794///
33795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33796#[inline]
33797#[target_feature(enable = "avx512f")]
33798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33799// This intrinsic has no corresponding instruction.
33800pub fn _mm512_undefined() -> __m512 {
33801    unsafe { const { mem::zeroed() } }
33802}
33803
33804/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33805///
33806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33807#[inline]
33808#[target_feature(enable = "avx512f")]
33809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33810#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33811pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33812    ptr::read_unaligned(mem_addr as *const __m512i)
33813}
33814
33815/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33816///
33817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33818#[inline]
33819#[target_feature(enable = "avx512f,avx512vl")]
33820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33821#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33822pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33823    ptr::read_unaligned(mem_addr as *const __m256i)
33824}
33825
33826/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33827///
33828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33829#[inline]
33830#[target_feature(enable = "avx512f,avx512vl")]
33831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33832#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33833pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33834    ptr::read_unaligned(mem_addr as *const __m128i)
33835}
33836
33837/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33838///
33839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33840#[inline]
33841#[target_feature(enable = "avx512f")]
33842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33843#[cfg_attr(test, assert_instr(vpmovdw))]
33844pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33845    vpmovdwmem(mem_addr, a.as_i32x16(), k);
33846}
33847
33848/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33849///
33850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33851#[inline]
33852#[target_feature(enable = "avx512f,avx512vl")]
33853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33854#[cfg_attr(test, assert_instr(vpmovdw))]
33855pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33856    vpmovdwmem256(mem_addr, a.as_i32x8(), k);
33857}
33858
33859/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33860///
33861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33862#[inline]
33863#[target_feature(enable = "avx512f,avx512vl")]
33864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33865#[cfg_attr(test, assert_instr(vpmovdw))]
33866pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33867    vpmovdwmem128(mem_addr, a.as_i32x4(), k);
33868}
33869
33870/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33871///
33872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33873#[inline]
33874#[target_feature(enable = "avx512f")]
33875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33876#[cfg_attr(test, assert_instr(vpmovsdw))]
33877pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33878    vpmovsdwmem(mem_addr, a.as_i32x16(), k);
33879}
33880
33881/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33882///
33883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33884#[inline]
33885#[target_feature(enable = "avx512f,avx512vl")]
33886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33887#[cfg_attr(test, assert_instr(vpmovsdw))]
33888pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33889    vpmovsdwmem256(mem_addr, a.as_i32x8(), k);
33890}
33891
33892/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33893///
33894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33895#[inline]
33896#[target_feature(enable = "avx512f,avx512vl")]
33897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33898#[cfg_attr(test, assert_instr(vpmovsdw))]
33899pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33900    vpmovsdwmem128(mem_addr, a.as_i32x4(), k);
33901}
33902
33903/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33904///
33905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33906#[inline]
33907#[target_feature(enable = "avx512f")]
33908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33909#[cfg_attr(test, assert_instr(vpmovusdw))]
33910pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33911    vpmovusdwmem(mem_addr, a.as_i32x16(), k);
33912}
33913
33914/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33915///
33916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33917#[inline]
33918#[target_feature(enable = "avx512f,avx512vl")]
33919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33920#[cfg_attr(test, assert_instr(vpmovusdw))]
33921pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33922    vpmovusdwmem256(mem_addr, a.as_i32x8(), k);
33923}
33924
33925/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33926///
33927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33928#[inline]
33929#[target_feature(enable = "avx512f,avx512vl")]
33930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33931#[cfg_attr(test, assert_instr(vpmovusdw))]
33932pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33933    vpmovusdwmem128(mem_addr, a.as_i32x4(), k);
33934}
33935
33936/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33937///
33938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33939#[inline]
33940#[target_feature(enable = "avx512f")]
33941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33942#[cfg_attr(test, assert_instr(vpmovdb))]
33943pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33944    vpmovdbmem(mem_addr, a.as_i32x16(), k);
33945}
33946
33947/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33948///
33949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33950#[inline]
33951#[target_feature(enable = "avx512f,avx512vl")]
33952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33953#[cfg_attr(test, assert_instr(vpmovdb))]
33954pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33955    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
33956}
33957
33958/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33959///
33960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33961#[inline]
33962#[target_feature(enable = "avx512f,avx512vl")]
33963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33964#[cfg_attr(test, assert_instr(vpmovdb))]
33965pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33966    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
33967}
33968
33969/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33970///
33971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33972#[inline]
33973#[target_feature(enable = "avx512f")]
33974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33975#[cfg_attr(test, assert_instr(vpmovsdb))]
33976pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33977    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
33978}
33979
33980/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33981///
33982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
33983#[inline]
33984#[target_feature(enable = "avx512f,avx512vl")]
33985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33986#[cfg_attr(test, assert_instr(vpmovsdb))]
33987pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33988    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
33989}
33990
33991/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33992///
33993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
33994#[inline]
33995#[target_feature(enable = "avx512f,avx512vl")]
33996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33997#[cfg_attr(test, assert_instr(vpmovsdb))]
33998pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33999    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
34000}
34001
34002/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34003///
34004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
34005#[inline]
34006#[target_feature(enable = "avx512f")]
34007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34008#[cfg_attr(test, assert_instr(vpmovusdb))]
34009pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34010    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
34011}
34012
34013/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34014///
34015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34016#[inline]
34017#[target_feature(enable = "avx512f,avx512vl")]
34018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34019#[cfg_attr(test, assert_instr(vpmovusdb))]
34020pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34021    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
34022}
34023
34024/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34025///
34026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34027#[inline]
34028#[target_feature(enable = "avx512f,avx512vl")]
34029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34030#[cfg_attr(test, assert_instr(vpmovusdb))]
34031pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34032    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
34033}
34034
34035/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34036///
34037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34038#[inline]
34039#[target_feature(enable = "avx512f")]
34040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34041#[cfg_attr(test, assert_instr(vpmovqw))]
34042pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34043    vpmovqwmem(mem_addr, a.as_i64x8(), k);
34044}
34045
34046/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34047///
34048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34049#[inline]
34050#[target_feature(enable = "avx512f,avx512vl")]
34051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34052#[cfg_attr(test, assert_instr(vpmovqw))]
34053pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34054    vpmovqwmem256(mem_addr, a.as_i64x4(), k);
34055}
34056
34057/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34058///
34059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34060#[inline]
34061#[target_feature(enable = "avx512f,avx512vl")]
34062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34063#[cfg_attr(test, assert_instr(vpmovqw))]
34064pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34065    vpmovqwmem128(mem_addr, a.as_i64x2(), k);
34066}
34067
34068/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34069///
34070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34071#[inline]
34072#[target_feature(enable = "avx512f")]
34073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34074#[cfg_attr(test, assert_instr(vpmovsqw))]
34075pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34076    vpmovsqwmem(mem_addr, a.as_i64x8(), k);
34077}
34078
34079/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34080///
34081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34082#[inline]
34083#[target_feature(enable = "avx512f,avx512vl")]
34084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34085#[cfg_attr(test, assert_instr(vpmovsqw))]
34086pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34087    vpmovsqwmem256(mem_addr, a.as_i64x4(), k);
34088}
34089
34090/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34091///
34092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34093#[inline]
34094#[target_feature(enable = "avx512f,avx512vl")]
34095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34096#[cfg_attr(test, assert_instr(vpmovsqw))]
34097pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34098    vpmovsqwmem128(mem_addr, a.as_i64x2(), k);
34099}
34100
34101/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34102///
34103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34104#[inline]
34105#[target_feature(enable = "avx512f")]
34106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34107#[cfg_attr(test, assert_instr(vpmovusqw))]
34108pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34109    vpmovusqwmem(mem_addr, a.as_i64x8(), k);
34110}
34111
34112/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34113///
34114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34115#[inline]
34116#[target_feature(enable = "avx512f,avx512vl")]
34117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34118#[cfg_attr(test, assert_instr(vpmovusqw))]
34119pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34120    vpmovusqwmem256(mem_addr, a.as_i64x4(), k);
34121}
34122
34123/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34124///
34125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34126#[inline]
34127#[target_feature(enable = "avx512f,avx512vl")]
34128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34129#[cfg_attr(test, assert_instr(vpmovusqw))]
34130pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34131    vpmovusqwmem128(mem_addr, a.as_i64x2(), k);
34132}
34133
34134/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34135///
34136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34137#[inline]
34138#[target_feature(enable = "avx512f")]
34139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34140#[cfg_attr(test, assert_instr(vpmovqb))]
34141pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34142    vpmovqbmem(mem_addr, a.as_i64x8(), k);
34143}
34144
34145/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34146///
34147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34148#[inline]
34149#[target_feature(enable = "avx512f,avx512vl")]
34150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34151#[cfg_attr(test, assert_instr(vpmovqb))]
34152pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34153    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
34154}
34155
34156/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34157///
34158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34159#[inline]
34160#[target_feature(enable = "avx512f,avx512vl")]
34161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34162#[cfg_attr(test, assert_instr(vpmovqb))]
34163pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34164    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
34165}
34166
34167/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34168///
34169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34170#[inline]
34171#[target_feature(enable = "avx512f")]
34172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34173#[cfg_attr(test, assert_instr(vpmovsqb))]
34174pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34175    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
34176}
34177
34178/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34179///
34180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34181#[inline]
34182#[target_feature(enable = "avx512f,avx512vl")]
34183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34184#[cfg_attr(test, assert_instr(vpmovsqb))]
34185pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34186    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
34187}
34188
34189/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34190///
34191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34192#[inline]
34193#[target_feature(enable = "avx512f,avx512vl")]
34194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34195#[cfg_attr(test, assert_instr(vpmovsqb))]
34196pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34197    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
34198}
34199
34200/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34201///
34202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34203#[inline]
34204#[target_feature(enable = "avx512f")]
34205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34206#[cfg_attr(test, assert_instr(vpmovusqb))]
34207pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34208    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
34209}
34210
34211/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34212///
34213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34214#[inline]
34215#[target_feature(enable = "avx512f,avx512vl")]
34216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34217#[cfg_attr(test, assert_instr(vpmovusqb))]
34218pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34219    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
34220}
34221
34222/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34223///
34224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34225#[inline]
34226#[target_feature(enable = "avx512f,avx512vl")]
34227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34228#[cfg_attr(test, assert_instr(vpmovusqb))]
34229pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34230    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
34231}
34232
34233///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34234///
34235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34236#[inline]
34237#[target_feature(enable = "avx512f")]
34238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34239#[cfg_attr(test, assert_instr(vpmovqd))]
34240pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34241    vpmovqdmem(mem_addr, a.as_i64x8(), k);
34242}
34243
34244///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34245///
34246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34247#[inline]
34248#[target_feature(enable = "avx512f,avx512vl")]
34249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34250#[cfg_attr(test, assert_instr(vpmovqd))]
34251pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34252    vpmovqdmem256(mem_addr, a.as_i64x4(), k);
34253}
34254
34255///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34256///
34257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34258#[inline]
34259#[target_feature(enable = "avx512f,avx512vl")]
34260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34261#[cfg_attr(test, assert_instr(vpmovqd))]
34262pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34263    vpmovqdmem128(mem_addr, a.as_i64x2(), k);
34264}
34265
34266/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34267///
34268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34269#[inline]
34270#[target_feature(enable = "avx512f")]
34271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34272#[cfg_attr(test, assert_instr(vpmovsqd))]
34273pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34274    vpmovsqdmem(mem_addr, a.as_i64x8(), k);
34275}
34276
34277/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34278///
34279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34280#[inline]
34281#[target_feature(enable = "avx512f,avx512vl")]
34282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34283#[cfg_attr(test, assert_instr(vpmovsqd))]
34284pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34285    vpmovsqdmem256(mem_addr, a.as_i64x4(), k);
34286}
34287
34288/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34289///
34290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34291#[inline]
34292#[target_feature(enable = "avx512f,avx512vl")]
34293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34294#[cfg_attr(test, assert_instr(vpmovsqd))]
34295pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34296    vpmovsqdmem128(mem_addr, a.as_i64x2(), k);
34297}
34298
34299/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34300///
34301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34302#[inline]
34303#[target_feature(enable = "avx512f")]
34304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34305#[cfg_attr(test, assert_instr(vpmovusqd))]
34306pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34307    vpmovusqdmem(mem_addr, a.as_i64x8(), k);
34308}
34309
34310/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34311///
34312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34313#[inline]
34314#[target_feature(enable = "avx512f,avx512vl")]
34315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34316#[cfg_attr(test, assert_instr(vpmovusqd))]
34317pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34318    vpmovusqdmem256(mem_addr, a.as_i64x4(), k);
34319}
34320
34321/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34322///
34323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34324#[inline]
34325#[target_feature(enable = "avx512f,avx512vl")]
34326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34327#[cfg_attr(test, assert_instr(vpmovusqd))]
34328pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34329    vpmovusqdmem128(mem_addr, a.as_i64x2(), k);
34330}
34331
34332/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34333///
34334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34335#[inline]
34336#[target_feature(enable = "avx512f")]
34337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34338#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34339pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34340    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34341}
34342
34343/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34344///
34345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34346#[inline]
34347#[target_feature(enable = "avx512f,avx512vl")]
34348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34349#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34350pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34351    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34352}
34353
34354/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34355///
34356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34357#[inline]
34358#[target_feature(enable = "avx512f,avx512vl")]
34359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34360#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34361pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34362    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34363}
34364
34365/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34366///
34367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34368#[inline]
34369#[target_feature(enable = "avx512f")]
34370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34371#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34372pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34373    ptr::read_unaligned(mem_addr as *const __m512i)
34374}
34375
34376/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34377///
34378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34379#[inline]
34380#[target_feature(enable = "avx512f,avx512vl")]
34381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34382#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34383pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34384    ptr::read_unaligned(mem_addr as *const __m256i)
34385}
34386
34387/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34388///
34389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34390#[inline]
34391#[target_feature(enable = "avx512f,avx512vl")]
34392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34393#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34394pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34395    ptr::read_unaligned(mem_addr as *const __m128i)
34396}
34397
34398/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34399///
34400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34401#[inline]
34402#[target_feature(enable = "avx512f")]
34403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34404#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34405pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34406    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34407}
34408
34409/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34410///
34411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34412#[inline]
34413#[target_feature(enable = "avx512f,avx512vl")]
34414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34415#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34416pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34417    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34418}
34419
34420/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34421///
34422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34423#[inline]
34424#[target_feature(enable = "avx512f,avx512vl")]
34425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34426#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34427pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34428    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34429}
34430
34431/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34432///
34433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34434#[inline]
34435#[target_feature(enable = "avx512f")]
34436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34437#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34438pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
34439    ptr::read_unaligned(mem_addr as *const __m512i)
34440}
34441
34442/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34443///
34444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34445#[inline]
34446#[target_feature(enable = "avx512f")]
34447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34448#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34449pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34450    ptr::write_unaligned(mem_addr, a);
34451}
34452
34453/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34454/// floating-point elements) from memory into result.
34455/// `mem_addr` does not need to be aligned on any particular boundary.
34456///
34457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34458#[inline]
34459#[target_feature(enable = "avx512f")]
34460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34461#[cfg_attr(test, assert_instr(vmovups))]
34462pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34463    ptr::read_unaligned(mem_addr as *const __m512d)
34464}
34465
34466/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34467/// floating-point elements) from `a` into memory.
34468/// `mem_addr` does not need to be aligned on any particular boundary.
34469///
34470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34471#[inline]
34472#[target_feature(enable = "avx512f")]
34473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34474#[cfg_attr(test, assert_instr(vmovups))]
34475pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34476    ptr::write_unaligned(mem_addr as *mut __m512d, a);
34477}
34478
34479/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34480/// floating-point elements) from memory into result.
34481/// `mem_addr` does not need to be aligned on any particular boundary.
34482///
34483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34484#[inline]
34485#[target_feature(enable = "avx512f")]
34486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34487#[cfg_attr(test, assert_instr(vmovups))]
34488pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34489    ptr::read_unaligned(mem_addr as *const __m512)
34490}
34491
34492/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34493/// floating-point elements) from `a` into memory.
34494/// `mem_addr` does not need to be aligned on any particular boundary.
34495///
34496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34497#[inline]
34498#[target_feature(enable = "avx512f")]
34499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34500#[cfg_attr(test, assert_instr(vmovups))]
34501pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34502    ptr::write_unaligned(mem_addr as *mut __m512, a);
34503}
34504
34505/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34506///
34507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34508#[inline]
34509#[target_feature(enable = "avx512f")]
34510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34511#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34512pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
34513    ptr::read(mem_addr as *const __m512i)
34514}
34515
34516/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34517///
34518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34519#[inline]
34520#[target_feature(enable = "avx512f")]
34521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34522#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34523pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34524    ptr::write(mem_addr, a);
34525}
34526
34527/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34528///
34529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34530#[inline]
34531#[target_feature(enable = "avx512f")]
34532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34533#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34534pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34535    ptr::read(mem_addr as *const __m512i)
34536}
34537
34538/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34539///
34540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34541#[inline]
34542#[target_feature(enable = "avx512f,avx512vl")]
34543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34544#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34545pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34546    ptr::read(mem_addr as *const __m256i)
34547}
34548
34549/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34550///
34551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34552#[inline]
34553#[target_feature(enable = "avx512f,avx512vl")]
34554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34555#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34556pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34557    ptr::read(mem_addr as *const __m128i)
34558}
34559
34560/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34561///
34562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34563#[inline]
34564#[target_feature(enable = "avx512f")]
34565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34566#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34567pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34568    ptr::write(mem_addr as *mut __m512i, a);
34569}
34570
34571/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34572///
34573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34574#[inline]
34575#[target_feature(enable = "avx512f,avx512vl")]
34576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34577#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34578pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34579    ptr::write(mem_addr as *mut __m256i, a);
34580}
34581
34582/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34583///
34584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34585#[inline]
34586#[target_feature(enable = "avx512f,avx512vl")]
34587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34588#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34589pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34590    ptr::write(mem_addr as *mut __m128i, a);
34591}
34592
34593/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34594///
34595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34596#[inline]
34597#[target_feature(enable = "avx512f")]
34598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34599#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34600pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34601    ptr::read(mem_addr as *const __m512i)
34602}
34603
34604/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34605///
34606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34607#[inline]
34608#[target_feature(enable = "avx512f,avx512vl")]
34609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34610#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34611pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34612    ptr::read(mem_addr as *const __m256i)
34613}
34614
34615/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34616///
34617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34618#[inline]
34619#[target_feature(enable = "avx512f,avx512vl")]
34620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34621#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34622pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34623    ptr::read(mem_addr as *const __m128i)
34624}
34625
34626/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34627///
34628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34629#[inline]
34630#[target_feature(enable = "avx512f")]
34631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34632#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34633pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34634    ptr::write(mem_addr as *mut __m512i, a);
34635}
34636
34637/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34638///
34639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34640#[inline]
34641#[target_feature(enable = "avx512f,avx512vl")]
34642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34643#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34644pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34645    ptr::write(mem_addr as *mut __m256i, a);
34646}
34647
34648/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34649///
34650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34651#[inline]
34652#[target_feature(enable = "avx512f,avx512vl")]
34653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34654#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34655pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34656    ptr::write(mem_addr as *mut __m128i, a);
34657}
34658
34659/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34660///
34661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34662#[inline]
34663#[target_feature(enable = "avx512f")]
34664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34665#[cfg_attr(test, assert_instr(vmovaps))]
34666pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34667    ptr::read(mem_addr as *const __m512)
34668}
34669
34670/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34671///
34672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34673#[inline]
34674#[target_feature(enable = "avx512f")]
34675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34676#[cfg_attr(test, assert_instr(vmovaps))]
34677pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34678    ptr::write(mem_addr as *mut __m512, a);
34679}
34680
34681/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34682///
34683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34684#[inline]
34685#[target_feature(enable = "avx512f")]
34686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34687#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34688pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34689    ptr::read(mem_addr as *const __m512d)
34690}
34691
34692/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34693///
34694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34695#[inline]
34696#[target_feature(enable = "avx512f")]
34697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34698#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34699pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34700    ptr::write(mem_addr as *mut __m512d, a);
34701}
34702
34703/// Load packed 32-bit integers from memory into dst using writemask k
34704/// (elements are copied from src when the corresponding mask bit is not set).
34705/// mem_addr does not need to be aligned on any particular boundary.
34706///
34707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34708#[inline]
34709#[target_feature(enable = "avx512f")]
34710#[cfg_attr(test, assert_instr(vmovdqu32))]
34711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34712pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34713    transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
34714}
34715
34716/// Load packed 32-bit integers from memory into dst using zeromask k
34717/// (elements are zeroed out when the corresponding mask bit is not set).
34718/// mem_addr does not need to be aligned on any particular boundary.
34719///
34720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34721#[inline]
34722#[target_feature(enable = "avx512f")]
34723#[cfg_attr(test, assert_instr(vmovdqu32))]
34724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34725pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34726    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
34727}
34728
34729/// Load packed 64-bit integers from memory into dst using writemask k
34730/// (elements are copied from src when the corresponding mask bit is not set).
34731/// mem_addr does not need to be aligned on any particular boundary.
34732///
34733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34734#[inline]
34735#[target_feature(enable = "avx512f")]
34736#[cfg_attr(test, assert_instr(vmovdqu64))]
34737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34738pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34739    transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
34740}
34741
34742/// Load packed 64-bit integers from memory into dst using zeromask k
34743/// (elements are zeroed out when the corresponding mask bit is not set).
34744/// mem_addr does not need to be aligned on any particular boundary.
34745///
34746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34747#[inline]
34748#[target_feature(enable = "avx512f")]
34749#[cfg_attr(test, assert_instr(vmovdqu64))]
34750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34751pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34752    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
34753}
34754
34755/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34756/// (elements are copied from src when the corresponding mask bit is not set).
34757/// mem_addr does not need to be aligned on any particular boundary.
34758///
34759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34760#[inline]
34761#[target_feature(enable = "avx512f")]
34762#[cfg_attr(test, assert_instr(vmovups))]
34763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34764pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34765    transmute(loadups_512(mem_addr, src.as_f32x16(), k))
34766}
34767
34768/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34769/// (elements are zeroed out when the corresponding mask bit is not set).
34770/// mem_addr does not need to be aligned on any particular boundary.
34771///
34772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34773#[inline]
34774#[target_feature(enable = "avx512f")]
34775#[cfg_attr(test, assert_instr(vmovups))]
34776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34777pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34778    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
34779}
34780
34781/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34782/// (elements are copied from src when the corresponding mask bit is not set).
34783/// mem_addr does not need to be aligned on any particular boundary.
34784///
34785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34786#[inline]
34787#[target_feature(enable = "avx512f")]
34788#[cfg_attr(test, assert_instr(vmovupd))]
34789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34790pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34791    transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
34792}
34793
34794/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34795/// (elements are zeroed out when the corresponding mask bit is not set).
34796/// mem_addr does not need to be aligned on any particular boundary.
34797///
34798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34799#[inline]
34800#[target_feature(enable = "avx512f")]
34801#[cfg_attr(test, assert_instr(vmovupd))]
34802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34803pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34804    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
34805}
34806
34807/// Load packed 32-bit integers from memory into dst using writemask k
34808/// (elements are copied from src when the corresponding mask bit is not set).
34809/// mem_addr does not need to be aligned on any particular boundary.
34810///
34811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34812#[inline]
34813#[target_feature(enable = "avx512f,avx512vl")]
34814#[cfg_attr(test, assert_instr(vmovdqu32))]
34815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34816pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34817    transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
34818}
34819
34820/// Load packed 32-bit integers from memory into dst using zeromask k
34821/// (elements are zeroed out when the corresponding mask bit is not set).
34822/// mem_addr does not need to be aligned on any particular boundary.
34823///
34824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34825#[inline]
34826#[target_feature(enable = "avx512f,avx512vl")]
34827#[cfg_attr(test, assert_instr(vmovdqu32))]
34828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34829pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34830    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
34831}
34832
34833/// Load packed 64-bit integers from memory into dst using writemask k
34834/// (elements are copied from src when the corresponding mask bit is not set).
34835/// mem_addr does not need to be aligned on any particular boundary.
34836///
34837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34838#[inline]
34839#[target_feature(enable = "avx512f,avx512vl")]
34840#[cfg_attr(test, assert_instr(vmovdqu64))]
34841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34842pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34843    transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
34844}
34845
34846/// Load packed 64-bit integers from memory into dst using zeromask k
34847/// (elements are zeroed out when the corresponding mask bit is not set).
34848/// mem_addr does not need to be aligned on any particular boundary.
34849///
34850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34851#[inline]
34852#[target_feature(enable = "avx512f,avx512vl")]
34853#[cfg_attr(test, assert_instr(vmovdqu64))]
34854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34855pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34856    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
34857}
34858
34859/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34860/// (elements are copied from src when the corresponding mask bit is not set).
34861/// mem_addr does not need to be aligned on any particular boundary.
34862///
34863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34864#[inline]
34865#[target_feature(enable = "avx512f,avx512vl")]
34866#[cfg_attr(test, assert_instr(vmovups))]
34867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34868pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34869    transmute(loadups_256(mem_addr, src.as_f32x8(), k))
34870}
34871
34872/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34873/// (elements are zeroed out when the corresponding mask bit is not set).
34874/// mem_addr does not need to be aligned on any particular boundary.
34875///
34876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34877#[inline]
34878#[target_feature(enable = "avx512f,avx512vl")]
34879#[cfg_attr(test, assert_instr(vmovups))]
34880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34881pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34882    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
34883}
34884
34885/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34886/// (elements are copied from src when the corresponding mask bit is not set).
34887/// mem_addr does not need to be aligned on any particular boundary.
34888///
34889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34890#[inline]
34891#[target_feature(enable = "avx512f,avx512vl")]
34892#[cfg_attr(test, assert_instr(vmovupd))]
34893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34894pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34895    transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
34896}
34897
34898/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34899/// (elements are zeroed out when the corresponding mask bit is not set).
34900/// mem_addr does not need to be aligned on any particular boundary.
34901///
34902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34903#[inline]
34904#[target_feature(enable = "avx512f,avx512vl")]
34905#[cfg_attr(test, assert_instr(vmovupd))]
34906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34907pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34908    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
34909}
34910
34911/// Load packed 32-bit integers from memory into dst using writemask k
34912/// (elements are copied from src when the corresponding mask bit is not set).
34913/// mem_addr does not need to be aligned on any particular boundary.
34914///
34915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34916#[inline]
34917#[target_feature(enable = "avx512f,avx512vl")]
34918#[cfg_attr(test, assert_instr(vmovdqu32))]
34919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34920pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34921    transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
34922}
34923
34924/// Load packed 32-bit integers from memory into dst using zeromask k
34925/// (elements are zeroed out when the corresponding mask bit is not set).
34926/// mem_addr does not need to be aligned on any particular boundary.
34927///
34928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
34929#[inline]
34930#[target_feature(enable = "avx512f,avx512vl")]
34931#[cfg_attr(test, assert_instr(vmovdqu32))]
34932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34933pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34934    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
34935}
34936
34937/// Load packed 64-bit integers from memory into dst using writemask k
34938/// (elements are copied from src when the corresponding mask bit is not set).
34939/// mem_addr does not need to be aligned on any particular boundary.
34940///
34941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
34942#[inline]
34943#[target_feature(enable = "avx512f,avx512vl")]
34944#[cfg_attr(test, assert_instr(vmovdqu64))]
34945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34946pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
34947    transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
34948}
34949
34950/// Load packed 64-bit integers from memory into dst using zeromask k
34951/// (elements are zeroed out when the corresponding mask bit is not set).
34952/// mem_addr does not need to be aligned on any particular boundary.
34953///
34954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
34955#[inline]
34956#[target_feature(enable = "avx512f,avx512vl")]
34957#[cfg_attr(test, assert_instr(vmovdqu64))]
34958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34959pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34960    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
34961}
34962
34963/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34964/// (elements are copied from src when the corresponding mask bit is not set).
34965/// mem_addr does not need to be aligned on any particular boundary.
34966///
34967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
34968#[inline]
34969#[target_feature(enable = "avx512f,avx512vl")]
34970#[cfg_attr(test, assert_instr(vmovups))]
34971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34972pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34973    transmute(loadups_128(mem_addr, src.as_f32x4(), k))
34974}
34975
34976/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34977/// (elements are zeroed out when the corresponding mask bit is not set).
34978/// mem_addr does not need to be aligned on any particular boundary.
34979///
34980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
34981#[inline]
34982#[target_feature(enable = "avx512f,avx512vl")]
34983#[cfg_attr(test, assert_instr(vmovups))]
34984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34985pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
34986    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
34987}
34988
34989/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34990/// (elements are copied from src when the corresponding mask bit is not set).
34991/// mem_addr does not need to be aligned on any particular boundary.
34992///
34993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
34994#[inline]
34995#[target_feature(enable = "avx512f,avx512vl")]
34996#[cfg_attr(test, assert_instr(vmovupd))]
34997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34998pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
34999    transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
35000}
35001
35002/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35003/// (elements are zeroed out when the corresponding mask bit is not set).
35004/// mem_addr does not need to be aligned on any particular boundary.
35005///
35006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35007#[inline]
35008#[target_feature(enable = "avx512f,avx512vl")]
35009#[cfg_attr(test, assert_instr(vmovupd))]
35010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35011pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35012    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
35013}
35014
35015/// Load packed 32-bit integers from memory into dst using writemask k
35016/// (elements are copied from src when the corresponding mask bit is not set).
35017/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35018///
35019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35020#[inline]
35021#[target_feature(enable = "avx512f")]
35022#[cfg_attr(test, assert_instr(vmovdqa32))]
35023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35024pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35025    transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
35026}
35027
35028/// Load packed 32-bit integers from memory into dst using zeromask k
35029/// (elements are zeroed out when the corresponding mask bit is not set).
35030/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35031///
35032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35033#[inline]
35034#[target_feature(enable = "avx512f")]
35035#[cfg_attr(test, assert_instr(vmovdqa32))]
35036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35037pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35038    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
35039}
35040
35041/// Load packed 64-bit integers from memory into dst using writemask k
35042/// (elements are copied from src when the corresponding mask bit is not set).
35043/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35044///
35045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35046#[inline]
35047#[target_feature(enable = "avx512f")]
35048#[cfg_attr(test, assert_instr(vmovdqa64))]
35049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35050pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35051    transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
35052}
35053
35054/// Load packed 64-bit integers from memory into dst using zeromask k
35055/// (elements are zeroed out when the corresponding mask bit is not set).
35056/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35057///
35058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35059#[inline]
35060#[target_feature(enable = "avx512f")]
35061#[cfg_attr(test, assert_instr(vmovdqa64))]
35062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35063pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35064    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
35065}
35066
35067/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35068/// (elements are copied from src when the corresponding mask bit is not set).
35069/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35070///
35071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35072#[inline]
35073#[target_feature(enable = "avx512f")]
35074#[cfg_attr(test, assert_instr(vmovaps))]
35075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35076pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35077    transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
35078}
35079
35080/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35081/// (elements are zeroed out when the corresponding mask bit is not set).
35082/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35083///
35084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35085#[inline]
35086#[target_feature(enable = "avx512f")]
35087#[cfg_attr(test, assert_instr(vmovaps))]
35088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35089pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35090    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
35091}
35092
35093/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35094/// (elements are copied from src when the corresponding mask bit is not set).
35095/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35096///
35097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35098#[inline]
35099#[target_feature(enable = "avx512f")]
35100#[cfg_attr(test, assert_instr(vmovapd))]
35101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35102pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35103    transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
35104}
35105
35106/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35107/// (elements are zeroed out when the corresponding mask bit is not set).
35108/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35109///
35110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35111#[inline]
35112#[target_feature(enable = "avx512f")]
35113#[cfg_attr(test, assert_instr(vmovapd))]
35114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35115pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35116    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
35117}
35118
35119/// Load packed 32-bit integers from memory into dst using writemask k
35120/// (elements are copied from src when the corresponding mask bit is not set).
35121/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35122///
35123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35124#[inline]
35125#[target_feature(enable = "avx512f,avx512vl")]
35126#[cfg_attr(test, assert_instr(vmovdqa32))]
35127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35128pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35129    transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
35130}
35131
35132/// Load packed 32-bit integers from memory into dst using zeromask k
35133/// (elements are zeroed out when the corresponding mask bit is not set).
35134/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35135///
35136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35137#[inline]
35138#[target_feature(enable = "avx512f,avx512vl")]
35139#[cfg_attr(test, assert_instr(vmovdqa32))]
35140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35141pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35142    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
35143}
35144
35145/// Load packed 64-bit integers from memory into dst using writemask k
35146/// (elements are copied from src when the corresponding mask bit is not set).
35147/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35148///
35149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35150#[inline]
35151#[target_feature(enable = "avx512f,avx512vl")]
35152#[cfg_attr(test, assert_instr(vmovdqa64))]
35153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35154pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35155    transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
35156}
35157
35158/// Load packed 64-bit integers from memory into dst using zeromask k
35159/// (elements are zeroed out when the corresponding mask bit is not set).
35160/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35161///
35162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35163#[inline]
35164#[target_feature(enable = "avx512f,avx512vl")]
35165#[cfg_attr(test, assert_instr(vmovdqa64))]
35166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35167pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35168    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
35169}
35170
35171/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35172/// (elements are copied from src when the corresponding mask bit is not set).
35173/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35174///
35175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35176#[inline]
35177#[target_feature(enable = "avx512f,avx512vl")]
35178#[cfg_attr(test, assert_instr(vmovaps))]
35179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35180pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35181    transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
35182}
35183
35184/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35185/// (elements are zeroed out when the corresponding mask bit is not set).
35186/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35187///
35188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35189#[inline]
35190#[target_feature(enable = "avx512f,avx512vl")]
35191#[cfg_attr(test, assert_instr(vmovaps))]
35192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35193pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35194    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
35195}
35196
35197/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35198/// (elements are copied from src when the corresponding mask bit is not set).
35199/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35200///
35201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35202#[inline]
35203#[target_feature(enable = "avx512f,avx512vl")]
35204#[cfg_attr(test, assert_instr(vmovapd))]
35205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35206pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35207    transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
35208}
35209
35210/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35211/// (elements are zeroed out when the corresponding mask bit is not set).
35212/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35213///
35214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35215#[inline]
35216#[target_feature(enable = "avx512f,avx512vl")]
35217#[cfg_attr(test, assert_instr(vmovapd))]
35218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35219pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35220    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
35221}
35222
35223/// Load packed 32-bit integers from memory into dst using writemask k
35224/// (elements are copied from src when the corresponding mask bit is not set).
35225/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35226///
35227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35228#[inline]
35229#[target_feature(enable = "avx512f,avx512vl")]
35230#[cfg_attr(test, assert_instr(vmovdqa32))]
35231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35232pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35233    transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
35234}
35235
35236/// Load packed 32-bit integers from memory into dst using zeromask k
35237/// (elements are zeroed out when the corresponding mask bit is not set).
35238/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35239///
35240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35241#[inline]
35242#[target_feature(enable = "avx512f,avx512vl")]
35243#[cfg_attr(test, assert_instr(vmovdqa32))]
35244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35245pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35246    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
35247}
35248
35249/// Load packed 64-bit integers from memory into dst using writemask k
35250/// (elements are copied from src when the corresponding mask bit is not set).
35251/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35252///
35253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35254#[inline]
35255#[target_feature(enable = "avx512f,avx512vl")]
35256#[cfg_attr(test, assert_instr(vmovdqa64))]
35257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35258pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35259    transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
35260}
35261
35262/// Load packed 64-bit integers from memory into dst using zeromask k
35263/// (elements are zeroed out when the corresponding mask bit is not set).
35264/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35265///
35266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35267#[inline]
35268#[target_feature(enable = "avx512f,avx512vl")]
35269#[cfg_attr(test, assert_instr(vmovdqa64))]
35270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35271pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35272    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
35273}
35274
35275/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35276/// (elements are copied from src when the corresponding mask bit is not set).
35277/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35278///
35279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35280#[inline]
35281#[target_feature(enable = "avx512f,avx512vl")]
35282#[cfg_attr(test, assert_instr(vmovaps))]
35283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35284pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35285    transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
35286}
35287
35288/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35289/// (elements are zeroed out when the corresponding mask bit is not set).
35290/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35291///
35292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35293#[inline]
35294#[target_feature(enable = "avx512f,avx512vl")]
35295#[cfg_attr(test, assert_instr(vmovaps))]
35296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35297pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35298    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
35299}
35300
35301/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35302/// (elements are copied from src when the corresponding mask bit is not set).
35303/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35304///
35305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35306#[inline]
35307#[target_feature(enable = "avx512f,avx512vl")]
35308#[cfg_attr(test, assert_instr(vmovapd))]
35309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35310pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35311    transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
35312}
35313
35314/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35315/// (elements are zeroed out when the corresponding mask bit is not set).
35316/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35317///
35318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35319#[inline]
35320#[target_feature(enable = "avx512f,avx512vl")]
35321#[cfg_attr(test, assert_instr(vmovapd))]
35322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35323pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35324    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
35325}
35326
35327/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35328/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35329/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35330/// exception may be generated.
35331///
35332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35333#[inline]
35334#[cfg_attr(test, assert_instr(vmovss))]
35335#[target_feature(enable = "avx512f")]
35336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35337pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35338    let mut dst: __m128 = src;
35339    asm!(
35340        vpl!("vmovss {dst}{{{k}}}"),
35341        p = in(reg) mem_addr,
35342        k = in(kreg) k,
35343        dst = inout(xmm_reg) dst,
35344        options(pure, readonly, nostack, preserves_flags),
35345    );
35346    dst
35347}
35348
35349/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35350/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35351/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35352/// exception may be generated.
35353///
35354/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35355#[inline]
35356#[cfg_attr(test, assert_instr(vmovss))]
35357#[target_feature(enable = "avx512f")]
35358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35359pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35360    let mut dst: __m128;
35361    asm!(
35362        vpl!("vmovss {dst}{{{k}}} {{z}}"),
35363        p = in(reg) mem_addr,
35364        k = in(kreg) k,
35365        dst = out(xmm_reg) dst,
35366        options(pure, readonly, nostack, preserves_flags),
35367    );
35368    dst
35369}
35370
35371/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35372/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35373/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35374/// exception may be generated.
35375///
35376/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35377#[inline]
35378#[cfg_attr(test, assert_instr(vmovsd))]
35379#[target_feature(enable = "avx512f")]
35380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35381pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35382    let mut dst: __m128d = src;
35383    asm!(
35384        vpl!("vmovsd {dst}{{{k}}}"),
35385        p = in(reg) mem_addr,
35386        k = in(kreg) k,
35387        dst = inout(xmm_reg) dst,
35388        options(pure, readonly, nostack, preserves_flags),
35389    );
35390    dst
35391}
35392
35393/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35394/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35395/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35396/// may be generated.
35397///
35398/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35399#[inline]
35400#[cfg_attr(test, assert_instr(vmovsd))]
35401#[target_feature(enable = "avx512f")]
35402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35403pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35404    let mut dst: __m128d;
35405    asm!(
35406        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35407        p = in(reg) mem_addr,
35408        k = in(kreg) k,
35409        dst = out(xmm_reg) dst,
35410        options(pure, readonly, nostack, preserves_flags),
35411    );
35412    dst
35413}
35414
35415/// Store packed 32-bit integers from a into memory using writemask k.
35416/// mem_addr does not need to be aligned on any particular boundary.
35417///
35418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35419#[inline]
35420#[target_feature(enable = "avx512f")]
35421#[cfg_attr(test, assert_instr(vmovdqu32))]
35422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35423pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35424    storedqu32_512(mem_addr, a.as_i32x16(), mask)
35425}
35426
35427/// Store packed 64-bit integers from a into memory using writemask k.
35428/// mem_addr does not need to be aligned on any particular boundary.
35429///
35430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35431#[inline]
35432#[target_feature(enable = "avx512f")]
35433#[cfg_attr(test, assert_instr(vmovdqu64))]
35434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35435pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35436    storedqu64_512(mem_addr, a.as_i64x8(), mask)
35437}
35438
35439/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35440/// mem_addr does not need to be aligned on any particular boundary.
35441///
35442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35443#[inline]
35444#[target_feature(enable = "avx512f")]
35445#[cfg_attr(test, assert_instr(vmovups))]
35446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35447pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35448    storeups_512(mem_addr, a.as_f32x16(), mask)
35449}
35450
35451/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35452/// mem_addr does not need to be aligned on any particular boundary.
35453///
35454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35455#[inline]
35456#[target_feature(enable = "avx512f")]
35457#[cfg_attr(test, assert_instr(vmovupd))]
35458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35459pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35460    storeupd_512(mem_addr, a.as_f64x8(), mask)
35461}
35462
35463/// Store packed 32-bit integers from a into memory using writemask k.
35464/// mem_addr does not need to be aligned on any particular boundary.
35465///
35466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35467#[inline]
35468#[target_feature(enable = "avx512f,avx512vl")]
35469#[cfg_attr(test, assert_instr(vmovdqu32))]
35470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35471pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35472    storedqu32_256(mem_addr, a.as_i32x8(), mask)
35473}
35474
35475/// Store packed 64-bit integers from a into memory using writemask k.
35476/// mem_addr does not need to be aligned on any particular boundary.
35477///
35478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35479#[inline]
35480#[target_feature(enable = "avx512f,avx512vl")]
35481#[cfg_attr(test, assert_instr(vmovdqu64))]
35482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35483pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35484    storedqu64_256(mem_addr, a.as_i64x4(), mask)
35485}
35486
35487/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35488/// mem_addr does not need to be aligned on any particular boundary.
35489///
35490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35491#[inline]
35492#[target_feature(enable = "avx512f,avx512vl")]
35493#[cfg_attr(test, assert_instr(vmovups))]
35494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35495pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35496    storeups_256(mem_addr, a.as_f32x8(), mask)
35497}
35498
35499/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35500/// mem_addr does not need to be aligned on any particular boundary.
35501///
35502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35503#[inline]
35504#[target_feature(enable = "avx512f,avx512vl")]
35505#[cfg_attr(test, assert_instr(vmovupd))]
35506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35507pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35508    storeupd_256(mem_addr, a.as_f64x4(), mask)
35509}
35510
35511/// Store packed 32-bit integers from a into memory using writemask k.
35512/// mem_addr does not need to be aligned on any particular boundary.
35513///
35514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35515#[inline]
35516#[target_feature(enable = "avx512f,avx512vl")]
35517#[cfg_attr(test, assert_instr(vmovdqu32))]
35518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35519pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35520    storedqu32_128(mem_addr, a.as_i32x4(), mask)
35521}
35522
35523/// Store packed 64-bit integers from a into memory using writemask k.
35524/// mem_addr does not need to be aligned on any particular boundary.
35525///
35526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35527#[inline]
35528#[target_feature(enable = "avx512f,avx512vl")]
35529#[cfg_attr(test, assert_instr(vmovdqu64))]
35530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35531pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35532    storedqu64_128(mem_addr, a.as_i64x2(), mask)
35533}
35534
35535/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35536/// mem_addr does not need to be aligned on any particular boundary.
35537///
35538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35539#[inline]
35540#[target_feature(enable = "avx512f,avx512vl")]
35541#[cfg_attr(test, assert_instr(vmovups))]
35542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35543pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35544    storeups_128(mem_addr, a.as_f32x4(), mask)
35545}
35546
35547/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35548/// mem_addr does not need to be aligned on any particular boundary.
35549///
35550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35551#[inline]
35552#[target_feature(enable = "avx512f,avx512vl")]
35553#[cfg_attr(test, assert_instr(vmovupd))]
35554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35555pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35556    storeupd_128(mem_addr, a.as_f64x2(), mask)
35557}
35558
35559/// Store packed 32-bit integers from a into memory using writemask k.
35560/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35561///
35562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35563#[inline]
35564#[target_feature(enable = "avx512f")]
35565#[cfg_attr(test, assert_instr(vmovdqa32))]
35566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35567pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35568    storedqa32_512(mem_addr, a.as_i32x16(), mask)
35569}
35570
35571/// Store packed 64-bit integers from a into memory using writemask k.
35572/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35573///
35574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35575#[inline]
35576#[target_feature(enable = "avx512f")]
35577#[cfg_attr(test, assert_instr(vmovdqa64))]
35578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35579pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35580    storedqa64_512(mem_addr, a.as_i64x8(), mask)
35581}
35582
35583/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35584/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35585///
35586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35587#[inline]
35588#[target_feature(enable = "avx512f")]
35589#[cfg_attr(test, assert_instr(vmovaps))]
35590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35591pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35592    storeaps_512(mem_addr, a.as_f32x16(), mask)
35593}
35594
35595/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35596/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35597///
35598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35599#[inline]
35600#[target_feature(enable = "avx512f")]
35601#[cfg_attr(test, assert_instr(vmovapd))]
35602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35603pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35604    storeapd_512(mem_addr, a.as_f64x8(), mask)
35605}
35606
35607/// Store packed 32-bit integers from a into memory using writemask k.
35608/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35609///
35610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35611#[inline]
35612#[target_feature(enable = "avx512f,avx512vl")]
35613#[cfg_attr(test, assert_instr(vmovdqa32))]
35614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35615pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35616    storedqa32_256(mem_addr, a.as_i32x8(), mask)
35617}
35618
35619/// Store packed 64-bit integers from a into memory using writemask k.
35620/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35621///
35622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35623#[inline]
35624#[target_feature(enable = "avx512f,avx512vl")]
35625#[cfg_attr(test, assert_instr(vmovdqa64))]
35626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35627pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35628    storedqa64_256(mem_addr, a.as_i64x4(), mask)
35629}
35630
35631/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35632/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35633///
35634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35635#[inline]
35636#[target_feature(enable = "avx512f,avx512vl")]
35637#[cfg_attr(test, assert_instr(vmovaps))]
35638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35639pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35640    storeaps_256(mem_addr, a.as_f32x8(), mask)
35641}
35642
35643/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35644/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35645///
35646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35647#[inline]
35648#[target_feature(enable = "avx512f,avx512vl")]
35649#[cfg_attr(test, assert_instr(vmovapd))]
35650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35651pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35652    storeapd_256(mem_addr, a.as_f64x4(), mask)
35653}
35654
35655/// Store packed 32-bit integers from a into memory using writemask k.
35656/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35657///
35658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35659#[inline]
35660#[target_feature(enable = "avx512f,avx512vl")]
35661#[cfg_attr(test, assert_instr(vmovdqa32))]
35662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35663pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35664    storedqa32_128(mem_addr, a.as_i32x4(), mask)
35665}
35666
35667/// Store packed 64-bit integers from a into memory using writemask k.
35668/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35669///
35670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35671#[inline]
35672#[target_feature(enable = "avx512f,avx512vl")]
35673#[cfg_attr(test, assert_instr(vmovdqa64))]
35674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35675pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35676    storedqa64_128(mem_addr, a.as_i64x2(), mask)
35677}
35678
35679/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35680/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35681///
35682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35683#[inline]
35684#[target_feature(enable = "avx512f,avx512vl")]
35685#[cfg_attr(test, assert_instr(vmovaps))]
35686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35687pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35688    storeaps_128(mem_addr, a.as_f32x4(), mask)
35689}
35690
35691/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35692/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35693///
35694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35695#[inline]
35696#[target_feature(enable = "avx512f,avx512vl")]
35697#[cfg_attr(test, assert_instr(vmovapd))]
35698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35699pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35700    storeapd_128(mem_addr, a.as_f64x2(), mask)
35701}
35702
35703/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35704/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35705///
35706/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35707#[inline]
35708#[cfg_attr(test, assert_instr(vmovss))]
35709#[target_feature(enable = "avx512f")]
35710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35711pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35712    asm!(
35713        vps!("vmovss", "{{{k}}}, {a}"),
35714        p = in(reg) mem_addr,
35715        k = in(kreg) k,
35716        a = in(xmm_reg) a,
35717        options(nostack, preserves_flags),
35718    );
35719}
35720
35721/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35722/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35723///
35724/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35725#[inline]
35726#[cfg_attr(test, assert_instr(vmovsd))]
35727#[target_feature(enable = "avx512f")]
35728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35729pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35730    asm!(
35731        vps!("vmovsd", "{{{k}}}, {a}"),
35732        p = in(reg) mem_addr,
35733        k = in(kreg) k,
35734        a = in(xmm_reg) a,
35735        options(nostack, preserves_flags),
35736    );
35737}
35738
35739/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35740///
35741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35742#[inline]
35743#[target_feature(enable = "avx512f")]
35744#[cfg_attr(test, assert_instr(vpexpandd))]
35745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35746pub unsafe fn _mm512_mask_expandloadu_epi32(
35747    src: __m512i,
35748    k: __mmask16,
35749    mem_addr: *const i32,
35750) -> __m512i {
35751    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
35752}
35753
35754/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35755///
35756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35757#[inline]
35758#[target_feature(enable = "avx512f")]
35759#[cfg_attr(test, assert_instr(vpexpandd))]
35760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35761pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35762    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
35763}
35764
35765/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35766///
35767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35768#[inline]
35769#[target_feature(enable = "avx512f,avx512vl")]
35770#[cfg_attr(test, assert_instr(vpexpandd))]
35771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35772pub unsafe fn _mm256_mask_expandloadu_epi32(
35773    src: __m256i,
35774    k: __mmask8,
35775    mem_addr: *const i32,
35776) -> __m256i {
35777    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
35778}
35779
35780/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35781///
35782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35783#[inline]
35784#[target_feature(enable = "avx512f,avx512vl")]
35785#[cfg_attr(test, assert_instr(vpexpandd))]
35786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35787pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35788    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
35789}
35790
35791/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35792///
35793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35794#[inline]
35795#[target_feature(enable = "avx512f,avx512vl")]
35796#[cfg_attr(test, assert_instr(vpexpandd))]
35797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35798pub unsafe fn _mm_mask_expandloadu_epi32(
35799    src: __m128i,
35800    k: __mmask8,
35801    mem_addr: *const i32,
35802) -> __m128i {
35803    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
35804}
35805
35806/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35807///
35808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35809#[inline]
35810#[target_feature(enable = "avx512f,avx512vl")]
35811#[cfg_attr(test, assert_instr(vpexpandd))]
35812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35813pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35814    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
35815}
35816
35817/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35818///
35819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35820#[inline]
35821#[target_feature(enable = "avx512f")]
35822#[cfg_attr(test, assert_instr(vpexpandq))]
35823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35824pub unsafe fn _mm512_mask_expandloadu_epi64(
35825    src: __m512i,
35826    k: __mmask8,
35827    mem_addr: *const i64,
35828) -> __m512i {
35829    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
35830}
35831
35832/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35833///
35834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35835#[inline]
35836#[target_feature(enable = "avx512f")]
35837#[cfg_attr(test, assert_instr(vpexpandq))]
35838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35839pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35840    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
35841}
35842
35843/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35844///
35845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35846#[inline]
35847#[target_feature(enable = "avx512f,avx512vl")]
35848#[cfg_attr(test, assert_instr(vpexpandq))]
35849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35850pub unsafe fn _mm256_mask_expandloadu_epi64(
35851    src: __m256i,
35852    k: __mmask8,
35853    mem_addr: *const i64,
35854) -> __m256i {
35855    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
35856}
35857
35858/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35859///
35860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35861#[inline]
35862#[target_feature(enable = "avx512f,avx512vl")]
35863#[cfg_attr(test, assert_instr(vpexpandq))]
35864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35865pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35866    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
35867}
35868
35869/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35870///
35871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35872#[inline]
35873#[target_feature(enable = "avx512f,avx512vl")]
35874#[cfg_attr(test, assert_instr(vpexpandq))]
35875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35876pub unsafe fn _mm_mask_expandloadu_epi64(
35877    src: __m128i,
35878    k: __mmask8,
35879    mem_addr: *const i64,
35880) -> __m128i {
35881    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
35882}
35883
35884/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35885///
35886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35887#[inline]
35888#[target_feature(enable = "avx512f,avx512vl")]
35889#[cfg_attr(test, assert_instr(vpexpandq))]
35890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35891pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35892    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
35893}
35894
35895/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35896///
35897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35898#[inline]
35899#[target_feature(enable = "avx512f")]
35900#[cfg_attr(test, assert_instr(vexpandps))]
35901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35902pub unsafe fn _mm512_mask_expandloadu_ps(
35903    src: __m512,
35904    k: __mmask16,
35905    mem_addr: *const f32,
35906) -> __m512 {
35907    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
35908}
35909
35910/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35911///
35912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35913#[inline]
35914#[target_feature(enable = "avx512f")]
35915#[cfg_attr(test, assert_instr(vexpandps))]
35916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35917pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35918    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
35919}
35920
35921/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35922///
35923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35924#[inline]
35925#[target_feature(enable = "avx512f,avx512vl")]
35926#[cfg_attr(test, assert_instr(vexpandps))]
35927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35928pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35929    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
35930}
35931
35932/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35933///
35934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
35935#[inline]
35936#[target_feature(enable = "avx512f,avx512vl")]
35937#[cfg_attr(test, assert_instr(vexpandps))]
35938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35939pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35940    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
35941}
35942
35943/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35944///
35945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
35946#[inline]
35947#[target_feature(enable = "avx512f,avx512vl")]
35948#[cfg_attr(test, assert_instr(vexpandps))]
35949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35950pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35951    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
35952}
35953
35954/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35955///
35956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
35957#[inline]
35958#[target_feature(enable = "avx512f,avx512vl")]
35959#[cfg_attr(test, assert_instr(vexpandps))]
35960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35961pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35962    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
35963}
35964
35965/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35966///
35967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
35968#[inline]
35969#[target_feature(enable = "avx512f")]
35970#[cfg_attr(test, assert_instr(vexpandpd))]
35971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35972pub unsafe fn _mm512_mask_expandloadu_pd(
35973    src: __m512d,
35974    k: __mmask8,
35975    mem_addr: *const f64,
35976) -> __m512d {
35977    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
35978}
35979
35980/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35981///
35982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
35983#[inline]
35984#[target_feature(enable = "avx512f")]
35985#[cfg_attr(test, assert_instr(vexpandpd))]
35986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35987pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35988    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
35989}
35990
35991/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35992///
35993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
35994#[inline]
35995#[target_feature(enable = "avx512f,avx512vl")]
35996#[cfg_attr(test, assert_instr(vexpandpd))]
35997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35998pub unsafe fn _mm256_mask_expandloadu_pd(
35999    src: __m256d,
36000    k: __mmask8,
36001    mem_addr: *const f64,
36002) -> __m256d {
36003    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
36004}
36005
36006/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36007///
36008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36009#[inline]
36010#[target_feature(enable = "avx512f,avx512vl")]
36011#[cfg_attr(test, assert_instr(vexpandpd))]
36012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36013pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36014    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
36015}
36016
36017/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36018///
36019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36020#[inline]
36021#[target_feature(enable = "avx512f,avx512vl")]
36022#[cfg_attr(test, assert_instr(vexpandpd))]
36023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36024pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36025    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
36026}
36027
36028/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36029///
36030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36031#[inline]
36032#[target_feature(enable = "avx512f,avx512vl")]
36033#[cfg_attr(test, assert_instr(vexpandpd))]
36034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36035pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36036    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
36037}
36038
36039/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36040///
36041/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36042#[inline]
36043#[target_feature(enable = "avx512f")]
36044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36045pub fn _mm512_setr_pd(
36046    e0: f64,
36047    e1: f64,
36048    e2: f64,
36049    e3: f64,
36050    e4: f64,
36051    e5: f64,
36052    e6: f64,
36053    e7: f64,
36054) -> __m512d {
36055    unsafe {
36056        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
36057        transmute(r)
36058    }
36059}
36060
36061/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36062///
36063/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36064#[inline]
36065#[target_feature(enable = "avx512f")]
36066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36067pub fn _mm512_set_pd(
36068    e0: f64,
36069    e1: f64,
36070    e2: f64,
36071    e3: f64,
36072    e4: f64,
36073    e5: f64,
36074    e6: f64,
36075    e7: f64,
36076) -> __m512d {
36077    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
36078}
36079
36080/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36081///
36082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36083#[inline]
36084#[target_feature(enable = "avx512f")]
36085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36086#[cfg_attr(test, assert_instr(vmovss))]
36087pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36088    unsafe {
36089        let extractsrc: f32 = simd_extract!(src, 0);
36090        let mut mov: f32 = extractsrc;
36091        if (k & 0b00000001) != 0 {
36092            mov = simd_extract!(b, 0);
36093        }
36094        simd_insert!(a, 0, mov)
36095    }
36096}
36097
36098/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36099///
36100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36101#[inline]
36102#[target_feature(enable = "avx512f")]
36103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36104#[cfg_attr(test, assert_instr(vmovss))]
36105pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36106    unsafe {
36107        let mut mov: f32 = 0.;
36108        if (k & 0b00000001) != 0 {
36109            mov = simd_extract!(b, 0);
36110        }
36111        simd_insert!(a, 0, mov)
36112    }
36113}
36114
36115/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36116///
36117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36118#[inline]
36119#[target_feature(enable = "avx512f")]
36120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36121#[cfg_attr(test, assert_instr(vmovsd))]
36122pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36123    unsafe {
36124        let extractsrc: f64 = simd_extract!(src, 0);
36125        let mut mov: f64 = extractsrc;
36126        if (k & 0b00000001) != 0 {
36127            mov = simd_extract!(b, 0);
36128        }
36129        simd_insert!(a, 0, mov)
36130    }
36131}
36132
36133/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36134///
36135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36136#[inline]
36137#[target_feature(enable = "avx512f")]
36138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36139#[cfg_attr(test, assert_instr(vmovsd))]
36140pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36141    unsafe {
36142        let mut mov: f64 = 0.;
36143        if (k & 0b00000001) != 0 {
36144            mov = simd_extract!(b, 0);
36145        }
36146        simd_insert!(a, 0, mov)
36147    }
36148}
36149
36150/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36151///
36152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36153#[inline]
36154#[target_feature(enable = "avx512f")]
36155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36156#[cfg_attr(test, assert_instr(vaddss))]
36157pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36158    unsafe {
36159        let extractsrc: f32 = simd_extract!(src, 0);
36160        let mut add: f32 = extractsrc;
36161        if (k & 0b00000001) != 0 {
36162            let extracta: f32 = simd_extract!(a, 0);
36163            let extractb: f32 = simd_extract!(b, 0);
36164            add = extracta + extractb;
36165        }
36166        simd_insert!(a, 0, add)
36167    }
36168}
36169
36170/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36171///
36172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36173#[inline]
36174#[target_feature(enable = "avx512f")]
36175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36176#[cfg_attr(test, assert_instr(vaddss))]
36177pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36178    unsafe {
36179        let mut add: f32 = 0.;
36180        if (k & 0b00000001) != 0 {
36181            let extracta: f32 = simd_extract!(a, 0);
36182            let extractb: f32 = simd_extract!(b, 0);
36183            add = extracta + extractb;
36184        }
36185        simd_insert!(a, 0, add)
36186    }
36187}
36188
36189/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36190///
36191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36192#[inline]
36193#[target_feature(enable = "avx512f")]
36194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36195#[cfg_attr(test, assert_instr(vaddsd))]
36196pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36197    unsafe {
36198        let extractsrc: f64 = simd_extract!(src, 0);
36199        let mut add: f64 = extractsrc;
36200        if (k & 0b00000001) != 0 {
36201            let extracta: f64 = simd_extract!(a, 0);
36202            let extractb: f64 = simd_extract!(b, 0);
36203            add = extracta + extractb;
36204        }
36205        simd_insert!(a, 0, add)
36206    }
36207}
36208
36209/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36210///
36211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36212#[inline]
36213#[target_feature(enable = "avx512f")]
36214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36215#[cfg_attr(test, assert_instr(vaddsd))]
36216pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36217    unsafe {
36218        let mut add: f64 = 0.;
36219        if (k & 0b00000001) != 0 {
36220            let extracta: f64 = simd_extract!(a, 0);
36221            let extractb: f64 = simd_extract!(b, 0);
36222            add = extracta + extractb;
36223        }
36224        simd_insert!(a, 0, add)
36225    }
36226}
36227
36228/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36229///
36230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36231#[inline]
36232#[target_feature(enable = "avx512f")]
36233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36234#[cfg_attr(test, assert_instr(vsubss))]
36235pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36236    unsafe {
36237        let extractsrc: f32 = simd_extract!(src, 0);
36238        let mut add: f32 = extractsrc;
36239        if (k & 0b00000001) != 0 {
36240            let extracta: f32 = simd_extract!(a, 0);
36241            let extractb: f32 = simd_extract!(b, 0);
36242            add = extracta - extractb;
36243        }
36244        simd_insert!(a, 0, add)
36245    }
36246}
36247
36248/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36249///
36250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36251#[inline]
36252#[target_feature(enable = "avx512f")]
36253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36254#[cfg_attr(test, assert_instr(vsubss))]
36255pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36256    unsafe {
36257        let mut add: f32 = 0.;
36258        if (k & 0b00000001) != 0 {
36259            let extracta: f32 = simd_extract!(a, 0);
36260            let extractb: f32 = simd_extract!(b, 0);
36261            add = extracta - extractb;
36262        }
36263        simd_insert!(a, 0, add)
36264    }
36265}
36266
36267/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36268///
36269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36270#[inline]
36271#[target_feature(enable = "avx512f")]
36272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36273#[cfg_attr(test, assert_instr(vsubsd))]
36274pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36275    unsafe {
36276        let extractsrc: f64 = simd_extract!(src, 0);
36277        let mut add: f64 = extractsrc;
36278        if (k & 0b00000001) != 0 {
36279            let extracta: f64 = simd_extract!(a, 0);
36280            let extractb: f64 = simd_extract!(b, 0);
36281            add = extracta - extractb;
36282        }
36283        simd_insert!(a, 0, add)
36284    }
36285}
36286
36287/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36288///
36289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36290#[inline]
36291#[target_feature(enable = "avx512f")]
36292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36293#[cfg_attr(test, assert_instr(vsubsd))]
36294pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36295    unsafe {
36296        let mut add: f64 = 0.;
36297        if (k & 0b00000001) != 0 {
36298            let extracta: f64 = simd_extract!(a, 0);
36299            let extractb: f64 = simd_extract!(b, 0);
36300            add = extracta - extractb;
36301        }
36302        simd_insert!(a, 0, add)
36303    }
36304}
36305
36306/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36307///
36308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36309#[inline]
36310#[target_feature(enable = "avx512f")]
36311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36312#[cfg_attr(test, assert_instr(vmulss))]
36313pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36314    unsafe {
36315        let extractsrc: f32 = simd_extract!(src, 0);
36316        let mut add: f32 = extractsrc;
36317        if (k & 0b00000001) != 0 {
36318            let extracta: f32 = simd_extract!(a, 0);
36319            let extractb: f32 = simd_extract!(b, 0);
36320            add = extracta * extractb;
36321        }
36322        simd_insert!(a, 0, add)
36323    }
36324}
36325
36326/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36327///
36328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36329#[inline]
36330#[target_feature(enable = "avx512f")]
36331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36332#[cfg_attr(test, assert_instr(vmulss))]
36333pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36334    unsafe {
36335        let mut add: f32 = 0.;
36336        if (k & 0b00000001) != 0 {
36337            let extracta: f32 = simd_extract!(a, 0);
36338            let extractb: f32 = simd_extract!(b, 0);
36339            add = extracta * extractb;
36340        }
36341        simd_insert!(a, 0, add)
36342    }
36343}
36344
36345/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36346///
36347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36348#[inline]
36349#[target_feature(enable = "avx512f")]
36350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36351#[cfg_attr(test, assert_instr(vmulsd))]
36352pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36353    unsafe {
36354        let extractsrc: f64 = simd_extract!(src, 0);
36355        let mut add: f64 = extractsrc;
36356        if (k & 0b00000001) != 0 {
36357            let extracta: f64 = simd_extract!(a, 0);
36358            let extractb: f64 = simd_extract!(b, 0);
36359            add = extracta * extractb;
36360        }
36361        simd_insert!(a, 0, add)
36362    }
36363}
36364
36365/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36366///
36367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36368#[inline]
36369#[target_feature(enable = "avx512f")]
36370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36371#[cfg_attr(test, assert_instr(vmulsd))]
36372pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36373    unsafe {
36374        let mut add: f64 = 0.;
36375        if (k & 0b00000001) != 0 {
36376            let extracta: f64 = simd_extract!(a, 0);
36377            let extractb: f64 = simd_extract!(b, 0);
36378            add = extracta * extractb;
36379        }
36380        simd_insert!(a, 0, add)
36381    }
36382}
36383
36384/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36385///
36386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36387#[inline]
36388#[target_feature(enable = "avx512f")]
36389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36390#[cfg_attr(test, assert_instr(vdivss))]
36391pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36392    unsafe {
36393        let extractsrc: f32 = simd_extract!(src, 0);
36394        let mut add: f32 = extractsrc;
36395        if (k & 0b00000001) != 0 {
36396            let extracta: f32 = simd_extract!(a, 0);
36397            let extractb: f32 = simd_extract!(b, 0);
36398            add = extracta / extractb;
36399        }
36400        simd_insert!(a, 0, add)
36401    }
36402}
36403
36404/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36405///
36406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36407#[inline]
36408#[target_feature(enable = "avx512f")]
36409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36410#[cfg_attr(test, assert_instr(vdivss))]
36411pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36412    unsafe {
36413        let mut add: f32 = 0.;
36414        if (k & 0b00000001) != 0 {
36415            let extracta: f32 = simd_extract!(a, 0);
36416            let extractb: f32 = simd_extract!(b, 0);
36417            add = extracta / extractb;
36418        }
36419        simd_insert!(a, 0, add)
36420    }
36421}
36422
36423/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36424///
36425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36426#[inline]
36427#[target_feature(enable = "avx512f")]
36428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36429#[cfg_attr(test, assert_instr(vdivsd))]
36430pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36431    unsafe {
36432        let extractsrc: f64 = simd_extract!(src, 0);
36433        let mut add: f64 = extractsrc;
36434        if (k & 0b00000001) != 0 {
36435            let extracta: f64 = simd_extract!(a, 0);
36436            let extractb: f64 = simd_extract!(b, 0);
36437            add = extracta / extractb;
36438        }
36439        simd_insert!(a, 0, add)
36440    }
36441}
36442
36443/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36444///
36445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36446#[inline]
36447#[target_feature(enable = "avx512f")]
36448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36449#[cfg_attr(test, assert_instr(vdivsd))]
36450pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36451    unsafe {
36452        let mut add: f64 = 0.;
36453        if (k & 0b00000001) != 0 {
36454            let extracta: f64 = simd_extract!(a, 0);
36455            let extractb: f64 = simd_extract!(b, 0);
36456            add = extracta / extractb;
36457        }
36458        simd_insert!(a, 0, add)
36459    }
36460}
36461
36462/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36463///
36464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36465#[inline]
36466#[target_feature(enable = "avx512f")]
36467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36468#[cfg_attr(test, assert_instr(vmaxss))]
36469pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36470    unsafe {
36471        transmute(vmaxss(
36472            a.as_f32x4(),
36473            b.as_f32x4(),
36474            src.as_f32x4(),
36475            k,
36476            _MM_FROUND_CUR_DIRECTION,
36477        ))
36478    }
36479}
36480
36481/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36482///
36483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36484#[inline]
36485#[target_feature(enable = "avx512f")]
36486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36487#[cfg_attr(test, assert_instr(vmaxss))]
36488pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36489    unsafe {
36490        transmute(vmaxss(
36491            a.as_f32x4(),
36492            b.as_f32x4(),
36493            f32x4::ZERO,
36494            k,
36495            _MM_FROUND_CUR_DIRECTION,
36496        ))
36497    }
36498}
36499
36500/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36501///
36502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36503#[inline]
36504#[target_feature(enable = "avx512f")]
36505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36506#[cfg_attr(test, assert_instr(vmaxsd))]
36507pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36508    unsafe {
36509        transmute(vmaxsd(
36510            a.as_f64x2(),
36511            b.as_f64x2(),
36512            src.as_f64x2(),
36513            k,
36514            _MM_FROUND_CUR_DIRECTION,
36515        ))
36516    }
36517}
36518
36519/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36520///
36521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36522#[inline]
36523#[target_feature(enable = "avx512f")]
36524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36525#[cfg_attr(test, assert_instr(vmaxsd))]
36526pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36527    unsafe {
36528        transmute(vmaxsd(
36529            a.as_f64x2(),
36530            b.as_f64x2(),
36531            f64x2::ZERO,
36532            k,
36533            _MM_FROUND_CUR_DIRECTION,
36534        ))
36535    }
36536}
36537
36538/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36539///
36540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36541#[inline]
36542#[target_feature(enable = "avx512f")]
36543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36544#[cfg_attr(test, assert_instr(vminss))]
36545pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36546    unsafe {
36547        transmute(vminss(
36548            a.as_f32x4(),
36549            b.as_f32x4(),
36550            src.as_f32x4(),
36551            k,
36552            _MM_FROUND_CUR_DIRECTION,
36553        ))
36554    }
36555}
36556
36557/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36558///
36559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36560#[inline]
36561#[target_feature(enable = "avx512f")]
36562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36563#[cfg_attr(test, assert_instr(vminss))]
36564pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36565    unsafe {
36566        transmute(vminss(
36567            a.as_f32x4(),
36568            b.as_f32x4(),
36569            f32x4::ZERO,
36570            k,
36571            _MM_FROUND_CUR_DIRECTION,
36572        ))
36573    }
36574}
36575
36576/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36577///
36578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36579#[inline]
36580#[target_feature(enable = "avx512f")]
36581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36582#[cfg_attr(test, assert_instr(vminsd))]
36583pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36584    unsafe {
36585        transmute(vminsd(
36586            a.as_f64x2(),
36587            b.as_f64x2(),
36588            src.as_f64x2(),
36589            k,
36590            _MM_FROUND_CUR_DIRECTION,
36591        ))
36592    }
36593}
36594
36595/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36596///
36597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36598#[inline]
36599#[target_feature(enable = "avx512f")]
36600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36601#[cfg_attr(test, assert_instr(vminsd))]
36602pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36603    unsafe {
36604        transmute(vminsd(
36605            a.as_f64x2(),
36606            b.as_f64x2(),
36607            f64x2::ZERO,
36608            k,
36609            _MM_FROUND_CUR_DIRECTION,
36610        ))
36611    }
36612}
36613
36614/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36615///
36616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36617#[inline]
36618#[target_feature(enable = "avx512f")]
36619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36620#[cfg_attr(test, assert_instr(vsqrtss))]
36621pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36622    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36623}
36624
36625/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36626///
36627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36628#[inline]
36629#[target_feature(enable = "avx512f")]
36630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36631#[cfg_attr(test, assert_instr(vsqrtss))]
36632pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36633    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
36634}
36635
36636/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36637///
36638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36639#[inline]
36640#[target_feature(enable = "avx512f")]
36641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36642#[cfg_attr(test, assert_instr(vsqrtsd))]
36643pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36644    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36645}
36646
36647/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36648///
36649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36650#[inline]
36651#[target_feature(enable = "avx512f")]
36652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36653#[cfg_attr(test, assert_instr(vsqrtsd))]
36654pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36655    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
36656}
36657
36658/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36659///
36660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36661#[inline]
36662#[target_feature(enable = "avx512f")]
36663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36664#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36665pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36666    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36667}
36668
36669/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36672#[inline]
36673#[target_feature(enable = "avx512f")]
36674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36675#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36676pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36677    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36678}
36679
36680/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36681///
36682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36683#[inline]
36684#[target_feature(enable = "avx512f")]
36685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36686#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36687pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36688    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36689}
36690
36691/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36692///
36693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36694#[inline]
36695#[target_feature(enable = "avx512f")]
36696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36697#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36698pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36699    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36700}
36701
36702/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36703///
36704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36705#[inline]
36706#[target_feature(enable = "avx512f")]
36707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36708#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36709pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36710    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36711}
36712
36713/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36714///
36715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36716#[inline]
36717#[target_feature(enable = "avx512f")]
36718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36719#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36720pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36721    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36722}
36723
36724/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36725///
36726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36727#[inline]
36728#[target_feature(enable = "avx512f")]
36729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36730#[cfg_attr(test, assert_instr(vrcp14ss))]
36731pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36732    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36733}
36734
36735/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36736///
36737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36738#[inline]
36739#[target_feature(enable = "avx512f")]
36740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36741#[cfg_attr(test, assert_instr(vrcp14ss))]
36742pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36743    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36744}
36745
36746/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36747///
36748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36749#[inline]
36750#[target_feature(enable = "avx512f")]
36751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36752#[cfg_attr(test, assert_instr(vrcp14ss))]
36753pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36754    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36755}
36756
36757/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36758///
36759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36760#[inline]
36761#[target_feature(enable = "avx512f")]
36762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36763#[cfg_attr(test, assert_instr(vrcp14sd))]
36764pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36765    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36766}
36767
36768/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36769///
36770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36771#[inline]
36772#[target_feature(enable = "avx512f")]
36773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36774#[cfg_attr(test, assert_instr(vrcp14sd))]
36775pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36776    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36777}
36778
36779/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36780///
36781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36782#[inline]
36783#[target_feature(enable = "avx512f")]
36784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36785#[cfg_attr(test, assert_instr(vrcp14sd))]
36786pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36787    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36788}
36789
36790/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36791///
36792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36793#[inline]
36794#[target_feature(enable = "avx512f")]
36795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36796#[cfg_attr(test, assert_instr(vgetexpss))]
36797pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36798    unsafe {
36799        transmute(vgetexpss(
36800            a.as_f32x4(),
36801            b.as_f32x4(),
36802            f32x4::ZERO,
36803            0b1,
36804            _MM_FROUND_NO_EXC,
36805        ))
36806    }
36807}
36808
36809/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36810///
36811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36812#[inline]
36813#[target_feature(enable = "avx512f")]
36814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36815#[cfg_attr(test, assert_instr(vgetexpss))]
36816pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36817    unsafe {
36818        transmute(vgetexpss(
36819            a.as_f32x4(),
36820            b.as_f32x4(),
36821            src.as_f32x4(),
36822            k,
36823            _MM_FROUND_NO_EXC,
36824        ))
36825    }
36826}
36827
36828/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36829///
36830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36831#[inline]
36832#[target_feature(enable = "avx512f")]
36833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36834#[cfg_attr(test, assert_instr(vgetexpss))]
36835pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36836    unsafe {
36837        transmute(vgetexpss(
36838            a.as_f32x4(),
36839            b.as_f32x4(),
36840            f32x4::ZERO,
36841            k,
36842            _MM_FROUND_NO_EXC,
36843        ))
36844    }
36845}
36846
36847/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36848///
36849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36850#[inline]
36851#[target_feature(enable = "avx512f")]
36852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36853#[cfg_attr(test, assert_instr(vgetexpsd))]
36854pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36855    unsafe {
36856        transmute(vgetexpsd(
36857            a.as_f64x2(),
36858            b.as_f64x2(),
36859            f64x2::ZERO,
36860            0b1,
36861            _MM_FROUND_NO_EXC,
36862        ))
36863    }
36864}
36865
36866/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36867///
36868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36869#[inline]
36870#[target_feature(enable = "avx512f")]
36871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36872#[cfg_attr(test, assert_instr(vgetexpsd))]
36873pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36874    unsafe {
36875        transmute(vgetexpsd(
36876            a.as_f64x2(),
36877            b.as_f64x2(),
36878            src.as_f64x2(),
36879            k,
36880            _MM_FROUND_NO_EXC,
36881        ))
36882    }
36883}
36884
36885/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36886///
36887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36888#[inline]
36889#[target_feature(enable = "avx512f")]
36890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36891#[cfg_attr(test, assert_instr(vgetexpsd))]
36892pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36893    unsafe {
36894        transmute(vgetexpsd(
36895            a.as_f64x2(),
36896            b.as_f64x2(),
36897            f64x2::ZERO,
36898            k,
36899            _MM_FROUND_NO_EXC,
36900        ))
36901    }
36902}
36903
36904/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36905/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36906///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36907///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36908///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36909///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36910/// The sign is determined by sc which can take the following values:\
36911///    _MM_MANT_SIGN_src     // sign = sign(src)\
36912///    _MM_MANT_SIGN_zero    // sign = 0\
36913///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36914/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36915///
36916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36917#[inline]
36918#[target_feature(enable = "avx512f")]
36919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36920#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36921#[rustc_legacy_const_generics(2, 3)]
36922pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36923    a: __m128,
36924    b: __m128,
36925) -> __m128 {
36926    unsafe {
36927        static_assert_uimm_bits!(NORM, 4);
36928        static_assert_uimm_bits!(SIGN, 2);
36929        let a = a.as_f32x4();
36930        let b = b.as_f32x4();
36931        let r = vgetmantss(
36932            a,
36933            b,
36934            SIGN << 2 | NORM,
36935            f32x4::ZERO,
36936            0b1,
36937            _MM_FROUND_CUR_DIRECTION,
36938        );
36939        transmute(r)
36940    }
36941}
36942
36943/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36944/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36945///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36946///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36947///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36948///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36949/// The sign is determined by sc which can take the following values:\
36950///    _MM_MANT_SIGN_src     // sign = sign(src)\
36951///    _MM_MANT_SIGN_zero    // sign = 0\
36952///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36953/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36954///
36955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
36956#[inline]
36957#[target_feature(enable = "avx512f")]
36958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36959#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36960#[rustc_legacy_const_generics(4, 5)]
36961pub fn _mm_mask_getmant_ss<
36962    const NORM: _MM_MANTISSA_NORM_ENUM,
36963    const SIGN: _MM_MANTISSA_SIGN_ENUM,
36964>(
36965    src: __m128,
36966    k: __mmask8,
36967    a: __m128,
36968    b: __m128,
36969) -> __m128 {
36970    unsafe {
36971        static_assert_uimm_bits!(NORM, 4);
36972        static_assert_uimm_bits!(SIGN, 2);
36973        let a = a.as_f32x4();
36974        let b = b.as_f32x4();
36975        let src = src.as_f32x4();
36976        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
36977        transmute(r)
36978    }
36979}
36980
36981/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36982/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36983///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36984///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36985///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36986///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36987/// The sign is determined by sc which can take the following values:\
36988///    _MM_MANT_SIGN_src     // sign = sign(src)\
36989///    _MM_MANT_SIGN_zero    // sign = 0\
36990///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36991/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36992///
36993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
36994#[inline]
36995#[target_feature(enable = "avx512f")]
36996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36997#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36998#[rustc_legacy_const_generics(3, 4)]
36999pub fn _mm_maskz_getmant_ss<
37000    const NORM: _MM_MANTISSA_NORM_ENUM,
37001    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37002>(
37003    k: __mmask8,
37004    a: __m128,
37005    b: __m128,
37006) -> __m128 {
37007    unsafe {
37008        static_assert_uimm_bits!(NORM, 4);
37009        static_assert_uimm_bits!(SIGN, 2);
37010        let a = a.as_f32x4();
37011        let b = b.as_f32x4();
37012        let r = vgetmantss(
37013            a,
37014            b,
37015            SIGN << 2 | NORM,
37016            f32x4::ZERO,
37017            k,
37018            _MM_FROUND_CUR_DIRECTION,
37019        );
37020        transmute(r)
37021    }
37022}
37023
37024/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37025/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37026///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37027///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37028///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37029///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37030/// The sign is determined by sc which can take the following values:\
37031///    _MM_MANT_SIGN_src     // sign = sign(src)\
37032///    _MM_MANT_SIGN_zero    // sign = 0\
37033///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37034/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37035///
37036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37037#[inline]
37038#[target_feature(enable = "avx512f")]
37039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37040#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37041#[rustc_legacy_const_generics(2, 3)]
37042pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37043    a: __m128d,
37044    b: __m128d,
37045) -> __m128d {
37046    unsafe {
37047        static_assert_uimm_bits!(NORM, 4);
37048        static_assert_uimm_bits!(SIGN, 2);
37049        let a = a.as_f64x2();
37050        let b = b.as_f64x2();
37051        let r = vgetmantsd(
37052            a,
37053            b,
37054            SIGN << 2 | NORM,
37055            f64x2::ZERO,
37056            0b1,
37057            _MM_FROUND_CUR_DIRECTION,
37058        );
37059        transmute(r)
37060    }
37061}
37062
37063/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37064/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37065///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37066///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37067///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37068///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37069/// The sign is determined by sc which can take the following values:\
37070///    _MM_MANT_SIGN_src     // sign = sign(src)\
37071///    _MM_MANT_SIGN_zero    // sign = 0\
37072///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37073/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37074///
37075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37076#[inline]
37077#[target_feature(enable = "avx512f")]
37078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37079#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37080#[rustc_legacy_const_generics(4, 5)]
37081pub fn _mm_mask_getmant_sd<
37082    const NORM: _MM_MANTISSA_NORM_ENUM,
37083    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37084>(
37085    src: __m128d,
37086    k: __mmask8,
37087    a: __m128d,
37088    b: __m128d,
37089) -> __m128d {
37090    unsafe {
37091        static_assert_uimm_bits!(NORM, 4);
37092        static_assert_uimm_bits!(SIGN, 2);
37093        let a = a.as_f64x2();
37094        let b = b.as_f64x2();
37095        let src = src.as_f64x2();
37096        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37097        transmute(r)
37098    }
37099}
37100
37101/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37102/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37103///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37104///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37105///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37106///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37107/// The sign is determined by sc which can take the following values:\
37108///    _MM_MANT_SIGN_src     // sign = sign(src)\
37109///    _MM_MANT_SIGN_zero    // sign = 0\
37110///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37111/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37112///
37113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37114#[inline]
37115#[target_feature(enable = "avx512f")]
37116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37117#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37118#[rustc_legacy_const_generics(3, 4)]
37119pub fn _mm_maskz_getmant_sd<
37120    const NORM: _MM_MANTISSA_NORM_ENUM,
37121    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37122>(
37123    k: __mmask8,
37124    a: __m128d,
37125    b: __m128d,
37126) -> __m128d {
37127    unsafe {
37128        static_assert_uimm_bits!(NORM, 4);
37129        static_assert_uimm_bits!(SIGN, 2);
37130        let a = a.as_f64x2();
37131        let b = b.as_f64x2();
37132        let r = vgetmantsd(
37133            a,
37134            b,
37135            SIGN << 2 | NORM,
37136            f64x2::ZERO,
37137            k,
37138            _MM_FROUND_CUR_DIRECTION,
37139        );
37140        transmute(r)
37141    }
37142}
37143
37144/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37145/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37146/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37147/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37148/// * [`_MM_FROUND_TO_POS_INF`] : round up
37149/// * [`_MM_FROUND_TO_ZERO`] : truncate
37150/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37151///
37152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37153#[inline]
37154#[target_feature(enable = "avx512f")]
37155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37156#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37157#[rustc_legacy_const_generics(2)]
37158pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37159    unsafe {
37160        static_assert_uimm_bits!(IMM8, 8);
37161        let a = a.as_f32x4();
37162        let b = b.as_f32x4();
37163        let r = vrndscaless(
37164            a,
37165            b,
37166            f32x4::ZERO,
37167            0b11111111,
37168            IMM8,
37169            _MM_FROUND_CUR_DIRECTION,
37170        );
37171        transmute(r)
37172    }
37173}
37174
37175/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37176/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37177/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37178/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37179/// * [`_MM_FROUND_TO_POS_INF`] : round up
37180/// * [`_MM_FROUND_TO_ZERO`] : truncate
37181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37182///
37183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37184#[inline]
37185#[target_feature(enable = "avx512f")]
37186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37187#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37188#[rustc_legacy_const_generics(4)]
37189pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37190    src: __m128,
37191    k: __mmask8,
37192    a: __m128,
37193    b: __m128,
37194) -> __m128 {
37195    unsafe {
37196        static_assert_uimm_bits!(IMM8, 8);
37197        let a = a.as_f32x4();
37198        let b = b.as_f32x4();
37199        let src = src.as_f32x4();
37200        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37201        transmute(r)
37202    }
37203}
37204
37205/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37206/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37207/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37208/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37209/// * [`_MM_FROUND_TO_POS_INF`] : round up
37210/// * [`_MM_FROUND_TO_ZERO`] : truncate
37211/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37212///
37213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37214#[inline]
37215#[target_feature(enable = "avx512f")]
37216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37217#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37218#[rustc_legacy_const_generics(3)]
37219pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37220    unsafe {
37221        static_assert_uimm_bits!(IMM8, 8);
37222        let a = a.as_f32x4();
37223        let b = b.as_f32x4();
37224        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37225        transmute(r)
37226    }
37227}
37228
37229/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37230/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37231/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37232/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37233/// * [`_MM_FROUND_TO_POS_INF`] : round up
37234/// * [`_MM_FROUND_TO_ZERO`] : truncate
37235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37236///
37237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37238#[inline]
37239#[target_feature(enable = "avx512f")]
37240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37241#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37242#[rustc_legacy_const_generics(2)]
37243pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37244    unsafe {
37245        static_assert_uimm_bits!(IMM8, 8);
37246        let a = a.as_f64x2();
37247        let b = b.as_f64x2();
37248        let r = vrndscalesd(
37249            a,
37250            b,
37251            f64x2::ZERO,
37252            0b11111111,
37253            IMM8,
37254            _MM_FROUND_CUR_DIRECTION,
37255        );
37256        transmute(r)
37257    }
37258}
37259
37260/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37261/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37262/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37263/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37264/// * [`_MM_FROUND_TO_POS_INF`] : round up
37265/// * [`_MM_FROUND_TO_ZERO`] : truncate
37266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37267///
37268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37269#[inline]
37270#[target_feature(enable = "avx512f")]
37271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37272#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37273#[rustc_legacy_const_generics(4)]
37274pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37275    src: __m128d,
37276    k: __mmask8,
37277    a: __m128d,
37278    b: __m128d,
37279) -> __m128d {
37280    unsafe {
37281        static_assert_uimm_bits!(IMM8, 8);
37282        let a = a.as_f64x2();
37283        let b = b.as_f64x2();
37284        let src = src.as_f64x2();
37285        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37286        transmute(r)
37287    }
37288}
37289
37290/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37291/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37292/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37293/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37294/// * [`_MM_FROUND_TO_POS_INF`] : round up
37295/// * [`_MM_FROUND_TO_ZERO`] : truncate
37296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37297///
37298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37299#[inline]
37300#[target_feature(enable = "avx512f")]
37301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37302#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37303#[rustc_legacy_const_generics(3)]
37304pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37305    unsafe {
37306        static_assert_uimm_bits!(IMM8, 8);
37307        let a = a.as_f64x2();
37308        let b = b.as_f64x2();
37309        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37310        transmute(r)
37311    }
37312}
37313
37314/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37315///
37316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37317#[inline]
37318#[target_feature(enable = "avx512f")]
37319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37320#[cfg_attr(test, assert_instr(vscalefss))]
37321pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37322    unsafe {
37323        let a = a.as_f32x4();
37324        let b = b.as_f32x4();
37325        transmute(vscalefss(
37326            a,
37327            b,
37328            f32x4::ZERO,
37329            0b11111111,
37330            _MM_FROUND_CUR_DIRECTION,
37331        ))
37332    }
37333}
37334
37335/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37336///
37337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37338#[inline]
37339#[target_feature(enable = "avx512f")]
37340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37341#[cfg_attr(test, assert_instr(vscalefss))]
37342pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37343    unsafe {
37344        let a = a.as_f32x4();
37345        let b = b.as_f32x4();
37346        let src = src.as_f32x4();
37347        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
37348    }
37349}
37350
37351/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37352///
37353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37354#[inline]
37355#[target_feature(enable = "avx512f")]
37356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37357#[cfg_attr(test, assert_instr(vscalefss))]
37358pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37359    unsafe {
37360        transmute(vscalefss(
37361            a.as_f32x4(),
37362            b.as_f32x4(),
37363            f32x4::ZERO,
37364            k,
37365            _MM_FROUND_CUR_DIRECTION,
37366        ))
37367    }
37368}
37369
37370/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37371///
37372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37373#[inline]
37374#[target_feature(enable = "avx512f")]
37375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37376#[cfg_attr(test, assert_instr(vscalefsd))]
37377pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37378    unsafe {
37379        transmute(vscalefsd(
37380            a.as_f64x2(),
37381            b.as_f64x2(),
37382            f64x2::ZERO,
37383            0b11111111,
37384            _MM_FROUND_CUR_DIRECTION,
37385        ))
37386    }
37387}
37388
37389/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37390///
37391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37392#[inline]
37393#[target_feature(enable = "avx512f")]
37394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37395#[cfg_attr(test, assert_instr(vscalefsd))]
37396pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37397    unsafe {
37398        transmute(vscalefsd(
37399            a.as_f64x2(),
37400            b.as_f64x2(),
37401            src.as_f64x2(),
37402            k,
37403            _MM_FROUND_CUR_DIRECTION,
37404        ))
37405    }
37406}
37407
37408/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37409///
37410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37411#[inline]
37412#[target_feature(enable = "avx512f")]
37413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37414#[cfg_attr(test, assert_instr(vscalefsd))]
37415pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37416    unsafe {
37417        transmute(vscalefsd(
37418            a.as_f64x2(),
37419            b.as_f64x2(),
37420            f64x2::ZERO,
37421            k,
37422            _MM_FROUND_CUR_DIRECTION,
37423        ))
37424    }
37425}
37426
37427/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37428///
37429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37430#[inline]
37431#[target_feature(enable = "avx512f")]
37432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37433#[cfg_attr(test, assert_instr(vfmadd))]
37434pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37435    unsafe {
37436        let mut fmadd: f32 = simd_extract!(a, 0);
37437        if (k & 0b00000001) != 0 {
37438            let extractb: f32 = simd_extract!(b, 0);
37439            let extractc: f32 = simd_extract!(c, 0);
37440            fmadd = fmaf32(fmadd, extractb, extractc);
37441        }
37442        simd_insert!(a, 0, fmadd)
37443    }
37444}
37445
37446/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37447///
37448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37449#[inline]
37450#[target_feature(enable = "avx512f")]
37451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37452#[cfg_attr(test, assert_instr(vfmadd))]
37453pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37454    unsafe {
37455        let mut fmadd: f32 = 0.;
37456        if (k & 0b00000001) != 0 {
37457            let extracta: f32 = simd_extract!(a, 0);
37458            let extractb: f32 = simd_extract!(b, 0);
37459            let extractc: f32 = simd_extract!(c, 0);
37460            fmadd = fmaf32(extracta, extractb, extractc);
37461        }
37462        simd_insert!(a, 0, fmadd)
37463    }
37464}
37465
37466/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37467///
37468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37469#[inline]
37470#[target_feature(enable = "avx512f")]
37471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37472#[cfg_attr(test, assert_instr(vfmadd))]
37473pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37474    unsafe {
37475        let mut fmadd: f32 = simd_extract!(c, 0);
37476        if (k & 0b00000001) != 0 {
37477            let extracta: f32 = simd_extract!(a, 0);
37478            let extractb: f32 = simd_extract!(b, 0);
37479            fmadd = fmaf32(extracta, extractb, fmadd);
37480        }
37481        simd_insert!(c, 0, fmadd)
37482    }
37483}
37484
37485/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37486///
37487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37488#[inline]
37489#[target_feature(enable = "avx512f")]
37490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37491#[cfg_attr(test, assert_instr(vfmadd))]
37492pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37493    unsafe {
37494        let mut fmadd: f64 = simd_extract!(a, 0);
37495        if (k & 0b00000001) != 0 {
37496            let extractb: f64 = simd_extract!(b, 0);
37497            let extractc: f64 = simd_extract!(c, 0);
37498            fmadd = fmaf64(fmadd, extractb, extractc);
37499        }
37500        simd_insert!(a, 0, fmadd)
37501    }
37502}
37503
37504/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37505///
37506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37507#[inline]
37508#[target_feature(enable = "avx512f")]
37509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37510#[cfg_attr(test, assert_instr(vfmadd))]
37511pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37512    unsafe {
37513        let mut fmadd: f64 = 0.;
37514        if (k & 0b00000001) != 0 {
37515            let extracta: f64 = simd_extract!(a, 0);
37516            let extractb: f64 = simd_extract!(b, 0);
37517            let extractc: f64 = simd_extract!(c, 0);
37518            fmadd = fmaf64(extracta, extractb, extractc);
37519        }
37520        simd_insert!(a, 0, fmadd)
37521    }
37522}
37523
37524/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37525///
37526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37527#[inline]
37528#[target_feature(enable = "avx512f")]
37529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37530#[cfg_attr(test, assert_instr(vfmadd))]
37531pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37532    unsafe {
37533        let mut fmadd: f64 = simd_extract!(c, 0);
37534        if (k & 0b00000001) != 0 {
37535            let extracta: f64 = simd_extract!(a, 0);
37536            let extractb: f64 = simd_extract!(b, 0);
37537            fmadd = fmaf64(extracta, extractb, fmadd);
37538        }
37539        simd_insert!(c, 0, fmadd)
37540    }
37541}
37542
37543/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37544///
37545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37546#[inline]
37547#[target_feature(enable = "avx512f")]
37548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37549#[cfg_attr(test, assert_instr(vfmsub))]
37550pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37551    unsafe {
37552        let mut fmsub: f32 = simd_extract!(a, 0);
37553        if (k & 0b00000001) != 0 {
37554            let extractb: f32 = simd_extract!(b, 0);
37555            let extractc: f32 = simd_extract!(c, 0);
37556            let extractc = -extractc;
37557            fmsub = fmaf32(fmsub, extractb, extractc);
37558        }
37559        simd_insert!(a, 0, fmsub)
37560    }
37561}
37562
37563/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37564///
37565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37566#[inline]
37567#[target_feature(enable = "avx512f")]
37568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37569#[cfg_attr(test, assert_instr(vfmsub))]
37570pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37571    unsafe {
37572        let mut fmsub: f32 = 0.;
37573        if (k & 0b00000001) != 0 {
37574            let extracta: f32 = simd_extract!(a, 0);
37575            let extractb: f32 = simd_extract!(b, 0);
37576            let extractc: f32 = simd_extract!(c, 0);
37577            let extractc = -extractc;
37578            fmsub = fmaf32(extracta, extractb, extractc);
37579        }
37580        simd_insert!(a, 0, fmsub)
37581    }
37582}
37583
37584/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37585///
37586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37587#[inline]
37588#[target_feature(enable = "avx512f")]
37589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37590#[cfg_attr(test, assert_instr(vfmsub))]
37591pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37592    unsafe {
37593        let mut fmsub: f32 = simd_extract!(c, 0);
37594        if (k & 0b00000001) != 0 {
37595            let extracta: f32 = simd_extract!(a, 0);
37596            let extractb: f32 = simd_extract!(b, 0);
37597            let extractc = -fmsub;
37598            fmsub = fmaf32(extracta, extractb, extractc);
37599        }
37600        simd_insert!(c, 0, fmsub)
37601    }
37602}
37603
37604/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37605///
37606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37607#[inline]
37608#[target_feature(enable = "avx512f")]
37609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37610#[cfg_attr(test, assert_instr(vfmsub))]
37611pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37612    unsafe {
37613        let mut fmsub: f64 = simd_extract!(a, 0);
37614        if (k & 0b00000001) != 0 {
37615            let extractb: f64 = simd_extract!(b, 0);
37616            let extractc: f64 = simd_extract!(c, 0);
37617            let extractc = -extractc;
37618            fmsub = fmaf64(fmsub, extractb, extractc);
37619        }
37620        simd_insert!(a, 0, fmsub)
37621    }
37622}
37623
37624/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37625///
37626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37627#[inline]
37628#[target_feature(enable = "avx512f")]
37629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37630#[cfg_attr(test, assert_instr(vfmsub))]
37631pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37632    unsafe {
37633        let mut fmsub: f64 = 0.;
37634        if (k & 0b00000001) != 0 {
37635            let extracta: f64 = simd_extract!(a, 0);
37636            let extractb: f64 = simd_extract!(b, 0);
37637            let extractc: f64 = simd_extract!(c, 0);
37638            let extractc = -extractc;
37639            fmsub = fmaf64(extracta, extractb, extractc);
37640        }
37641        simd_insert!(a, 0, fmsub)
37642    }
37643}
37644
37645/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37646///
37647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37648#[inline]
37649#[target_feature(enable = "avx512f")]
37650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37651#[cfg_attr(test, assert_instr(vfmsub))]
37652pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37653    unsafe {
37654        let mut fmsub: f64 = simd_extract!(c, 0);
37655        if (k & 0b00000001) != 0 {
37656            let extracta: f64 = simd_extract!(a, 0);
37657            let extractb: f64 = simd_extract!(b, 0);
37658            let extractc = -fmsub;
37659            fmsub = fmaf64(extracta, extractb, extractc);
37660        }
37661        simd_insert!(c, 0, fmsub)
37662    }
37663}
37664
37665/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37666///
37667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37668#[inline]
37669#[target_feature(enable = "avx512f")]
37670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37671#[cfg_attr(test, assert_instr(vfnmadd))]
37672pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37673    unsafe {
37674        let mut fnmadd: f32 = simd_extract!(a, 0);
37675        if (k & 0b00000001) != 0 {
37676            let extracta = -fnmadd;
37677            let extractb: f32 = simd_extract!(b, 0);
37678            let extractc: f32 = simd_extract!(c, 0);
37679            fnmadd = fmaf32(extracta, extractb, extractc);
37680        }
37681        simd_insert!(a, 0, fnmadd)
37682    }
37683}
37684
37685/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37686///
37687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37688#[inline]
37689#[target_feature(enable = "avx512f")]
37690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37691#[cfg_attr(test, assert_instr(vfnmadd))]
37692pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37693    unsafe {
37694        let mut fnmadd: f32 = 0.;
37695        if (k & 0b00000001) != 0 {
37696            let extracta: f32 = simd_extract!(a, 0);
37697            let extracta = -extracta;
37698            let extractb: f32 = simd_extract!(b, 0);
37699            let extractc: f32 = simd_extract!(c, 0);
37700            fnmadd = fmaf32(extracta, extractb, extractc);
37701        }
37702        simd_insert!(a, 0, fnmadd)
37703    }
37704}
37705
37706/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37707///
37708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37709#[inline]
37710#[target_feature(enable = "avx512f")]
37711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37712#[cfg_attr(test, assert_instr(vfnmadd))]
37713pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37714    unsafe {
37715        let mut fnmadd: f32 = simd_extract!(c, 0);
37716        if (k & 0b00000001) != 0 {
37717            let extracta: f32 = simd_extract!(a, 0);
37718            let extracta = -extracta;
37719            let extractb: f32 = simd_extract!(b, 0);
37720            fnmadd = fmaf32(extracta, extractb, fnmadd);
37721        }
37722        simd_insert!(c, 0, fnmadd)
37723    }
37724}
37725
37726/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37727///
37728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37729#[inline]
37730#[target_feature(enable = "avx512f")]
37731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37732#[cfg_attr(test, assert_instr(vfnmadd))]
37733pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37734    unsafe {
37735        let mut fnmadd: f64 = simd_extract!(a, 0);
37736        if (k & 0b00000001) != 0 {
37737            let extracta = -fnmadd;
37738            let extractb: f64 = simd_extract!(b, 0);
37739            let extractc: f64 = simd_extract!(c, 0);
37740            fnmadd = fmaf64(extracta, extractb, extractc);
37741        }
37742        simd_insert!(a, 0, fnmadd)
37743    }
37744}
37745
37746/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37747///
37748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37749#[inline]
37750#[target_feature(enable = "avx512f")]
37751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37752#[cfg_attr(test, assert_instr(vfnmadd))]
37753pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37754    unsafe {
37755        let mut fnmadd: f64 = 0.;
37756        if (k & 0b00000001) != 0 {
37757            let extracta: f64 = simd_extract!(a, 0);
37758            let extracta = -extracta;
37759            let extractb: f64 = simd_extract!(b, 0);
37760            let extractc: f64 = simd_extract!(c, 0);
37761            fnmadd = fmaf64(extracta, extractb, extractc);
37762        }
37763        simd_insert!(a, 0, fnmadd)
37764    }
37765}
37766
37767/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37768///
37769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37770#[inline]
37771#[target_feature(enable = "avx512f")]
37772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37773#[cfg_attr(test, assert_instr(vfnmadd))]
37774pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37775    unsafe {
37776        let mut fnmadd: f64 = simd_extract!(c, 0);
37777        if (k & 0b00000001) != 0 {
37778            let extracta: f64 = simd_extract!(a, 0);
37779            let extracta = -extracta;
37780            let extractb: f64 = simd_extract!(b, 0);
37781            fnmadd = fmaf64(extracta, extractb, fnmadd);
37782        }
37783        simd_insert!(c, 0, fnmadd)
37784    }
37785}
37786
37787/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37788///
37789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37790#[inline]
37791#[target_feature(enable = "avx512f")]
37792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37793#[cfg_attr(test, assert_instr(vfnmsub))]
37794pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37795    unsafe {
37796        let mut fnmsub: f32 = simd_extract!(a, 0);
37797        if (k & 0b00000001) != 0 {
37798            let extracta = -fnmsub;
37799            let extractb: f32 = simd_extract!(b, 0);
37800            let extractc: f32 = simd_extract!(c, 0);
37801            let extractc = -extractc;
37802            fnmsub = fmaf32(extracta, extractb, extractc);
37803        }
37804        simd_insert!(a, 0, fnmsub)
37805    }
37806}
37807
37808/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37809///
37810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37811#[inline]
37812#[target_feature(enable = "avx512f")]
37813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37814#[cfg_attr(test, assert_instr(vfnmsub))]
37815pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37816    unsafe {
37817        let mut fnmsub: f32 = 0.;
37818        if (k & 0b00000001) != 0 {
37819            let extracta: f32 = simd_extract!(a, 0);
37820            let extracta = -extracta;
37821            let extractb: f32 = simd_extract!(b, 0);
37822            let extractc: f32 = simd_extract!(c, 0);
37823            let extractc = -extractc;
37824            fnmsub = fmaf32(extracta, extractb, extractc);
37825        }
37826        simd_insert!(a, 0, fnmsub)
37827    }
37828}
37829
37830/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37831///
37832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37833#[inline]
37834#[target_feature(enable = "avx512f")]
37835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37836#[cfg_attr(test, assert_instr(vfnmsub))]
37837pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37838    unsafe {
37839        let mut fnmsub: f32 = simd_extract!(c, 0);
37840        if (k & 0b00000001) != 0 {
37841            let extracta: f32 = simd_extract!(a, 0);
37842            let extracta = -extracta;
37843            let extractb: f32 = simd_extract!(b, 0);
37844            let extractc = -fnmsub;
37845            fnmsub = fmaf32(extracta, extractb, extractc);
37846        }
37847        simd_insert!(c, 0, fnmsub)
37848    }
37849}
37850
37851/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37852///
37853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37854#[inline]
37855#[target_feature(enable = "avx512f")]
37856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37857#[cfg_attr(test, assert_instr(vfnmsub))]
37858pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37859    unsafe {
37860        let mut fnmsub: f64 = simd_extract!(a, 0);
37861        if (k & 0b00000001) != 0 {
37862            let extracta = -fnmsub;
37863            let extractb: f64 = simd_extract!(b, 0);
37864            let extractc: f64 = simd_extract!(c, 0);
37865            let extractc = -extractc;
37866            fnmsub = fmaf64(extracta, extractb, extractc);
37867        }
37868        simd_insert!(a, 0, fnmsub)
37869    }
37870}
37871
37872/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37873///
37874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37875#[inline]
37876#[target_feature(enable = "avx512f")]
37877#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37878#[cfg_attr(test, assert_instr(vfnmsub))]
37879pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37880    unsafe {
37881        let mut fnmsub: f64 = 0.;
37882        if (k & 0b00000001) != 0 {
37883            let extracta: f64 = simd_extract!(a, 0);
37884            let extracta = -extracta;
37885            let extractb: f64 = simd_extract!(b, 0);
37886            let extractc: f64 = simd_extract!(c, 0);
37887            let extractc = -extractc;
37888            fnmsub = fmaf64(extracta, extractb, extractc);
37889        }
37890        simd_insert!(a, 0, fnmsub)
37891    }
37892}
37893
37894/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37895///
37896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37897#[inline]
37898#[target_feature(enable = "avx512f")]
37899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37900#[cfg_attr(test, assert_instr(vfnmsub))]
37901pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37902    unsafe {
37903        let mut fnmsub: f64 = simd_extract!(c, 0);
37904        if (k & 0b00000001) != 0 {
37905            let extracta: f64 = simd_extract!(a, 0);
37906            let extracta = -extracta;
37907            let extractb: f64 = simd_extract!(b, 0);
37908            let extractc = -fnmsub;
37909            fnmsub = fmaf64(extracta, extractb, extractc);
37910        }
37911        simd_insert!(c, 0, fnmsub)
37912    }
37913}
37914
37915/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37916///
37917/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37918/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37919/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37920/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37921/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37922/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37923///
37924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37925#[inline]
37926#[target_feature(enable = "avx512f")]
37927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37928#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37929#[rustc_legacy_const_generics(2)]
37930pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37931    unsafe {
37932        static_assert_rounding!(ROUNDING);
37933        let a = a.as_f32x4();
37934        let b = b.as_f32x4();
37935        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
37936        transmute(r)
37937    }
37938}
37939
37940/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37941///
37942/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37943/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37944/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37945/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37946/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37947/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37948///
37949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
37950#[inline]
37951#[target_feature(enable = "avx512f")]
37952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37953#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37954#[rustc_legacy_const_generics(4)]
37955pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
37956    src: __m128,
37957    k: __mmask8,
37958    a: __m128,
37959    b: __m128,
37960) -> __m128 {
37961    unsafe {
37962        static_assert_rounding!(ROUNDING);
37963        let a = a.as_f32x4();
37964        let b = b.as_f32x4();
37965        let src = src.as_f32x4();
37966        let r = vaddss(a, b, src, k, ROUNDING);
37967        transmute(r)
37968    }
37969}
37970
37971/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37972///
37973/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37974/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37975/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37976/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37977/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37978/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37979///
37980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
37981#[inline]
37982#[target_feature(enable = "avx512f")]
37983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37984#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37985#[rustc_legacy_const_generics(3)]
37986pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37987    unsafe {
37988        static_assert_rounding!(ROUNDING);
37989        let a = a.as_f32x4();
37990        let b = b.as_f32x4();
37991        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
37992        transmute(r)
37993    }
37994}
37995
37996/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37997///
37998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38004///
38005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38006#[inline]
38007#[target_feature(enable = "avx512f")]
38008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38009#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38010#[rustc_legacy_const_generics(2)]
38011pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38012    unsafe {
38013        static_assert_rounding!(ROUNDING);
38014        let a = a.as_f64x2();
38015        let b = b.as_f64x2();
38016        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38017        transmute(r)
38018    }
38019}
38020
38021/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38022///
38023/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38024/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38025/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38026/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38027/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38028/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38029///
38030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38031#[inline]
38032#[target_feature(enable = "avx512f")]
38033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38034#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38035#[rustc_legacy_const_generics(4)]
38036pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38037    src: __m128d,
38038    k: __mmask8,
38039    a: __m128d,
38040    b: __m128d,
38041) -> __m128d {
38042    unsafe {
38043        static_assert_rounding!(ROUNDING);
38044        let a = a.as_f64x2();
38045        let b = b.as_f64x2();
38046        let src = src.as_f64x2();
38047        let r = vaddsd(a, b, src, k, ROUNDING);
38048        transmute(r)
38049    }
38050}
38051
38052/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38053///
38054/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38055/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38056/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38057/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38058/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38059/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38060///
38061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38062#[inline]
38063#[target_feature(enable = "avx512f")]
38064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38065#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38066#[rustc_legacy_const_generics(3)]
38067pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38068    unsafe {
38069        static_assert_rounding!(ROUNDING);
38070        let a = a.as_f64x2();
38071        let b = b.as_f64x2();
38072        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
38073        transmute(r)
38074    }
38075}
38076
38077/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38078///
38079/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38080/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38081/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38082/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38083/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38084/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38085///
38086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38087#[inline]
38088#[target_feature(enable = "avx512f")]
38089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38090#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38091#[rustc_legacy_const_generics(2)]
38092pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38093    unsafe {
38094        static_assert_rounding!(ROUNDING);
38095        let a = a.as_f32x4();
38096        let b = b.as_f32x4();
38097        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38098        transmute(r)
38099    }
38100}
38101
38102/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38103///
38104/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38105/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38106/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38107/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38108/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38109/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38110///
38111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38112#[inline]
38113#[target_feature(enable = "avx512f")]
38114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38115#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38116#[rustc_legacy_const_generics(4)]
38117pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38118    src: __m128,
38119    k: __mmask8,
38120    a: __m128,
38121    b: __m128,
38122) -> __m128 {
38123    unsafe {
38124        static_assert_rounding!(ROUNDING);
38125        let a = a.as_f32x4();
38126        let b = b.as_f32x4();
38127        let src = src.as_f32x4();
38128        let r = vsubss(a, b, src, k, ROUNDING);
38129        transmute(r)
38130    }
38131}
38132
38133/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38134///
38135/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38136/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38137/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38138/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38139/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38140/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38141///
38142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38143#[inline]
38144#[target_feature(enable = "avx512f")]
38145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38146#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38147#[rustc_legacy_const_generics(3)]
38148pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38149    unsafe {
38150        static_assert_rounding!(ROUNDING);
38151        let a = a.as_f32x4();
38152        let b = b.as_f32x4();
38153        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
38154        transmute(r)
38155    }
38156}
38157
38158/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38159///
38160/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38161/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38162/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38163/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38164/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38165/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38166///
38167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38168#[inline]
38169#[target_feature(enable = "avx512f")]
38170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38171#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38172#[rustc_legacy_const_generics(2)]
38173pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38174    unsafe {
38175        static_assert_rounding!(ROUNDING);
38176        let a = a.as_f64x2();
38177        let b = b.as_f64x2();
38178        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38179        transmute(r)
38180    }
38181}
38182
38183/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38184///
38185/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38186/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38187/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38188/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38189/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38190/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38191///
38192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38193#[inline]
38194#[target_feature(enable = "avx512f")]
38195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38196#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38197#[rustc_legacy_const_generics(4)]
38198pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38199    src: __m128d,
38200    k: __mmask8,
38201    a: __m128d,
38202    b: __m128d,
38203) -> __m128d {
38204    unsafe {
38205        static_assert_rounding!(ROUNDING);
38206        let a = a.as_f64x2();
38207        let b = b.as_f64x2();
38208        let src = src.as_f64x2();
38209        let r = vsubsd(a, b, src, k, ROUNDING);
38210        transmute(r)
38211    }
38212}
38213
38214/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38215///
38216/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38217/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38218/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38219/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38220/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38221/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38222///
38223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38224#[inline]
38225#[target_feature(enable = "avx512f")]
38226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38227#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38228#[rustc_legacy_const_generics(3)]
38229pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38230    unsafe {
38231        static_assert_rounding!(ROUNDING);
38232        let a = a.as_f64x2();
38233        let b = b.as_f64x2();
38234        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
38235        transmute(r)
38236    }
38237}
38238
38239/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38240///
38241/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38242/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38243/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38244/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38245/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38246/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38247///
38248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38249#[inline]
38250#[target_feature(enable = "avx512f")]
38251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38252#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38253#[rustc_legacy_const_generics(2)]
38254pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38255    unsafe {
38256        static_assert_rounding!(ROUNDING);
38257        let a = a.as_f32x4();
38258        let b = b.as_f32x4();
38259        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38260        transmute(r)
38261    }
38262}
38263
38264/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38265///
38266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38272///
38273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38274#[inline]
38275#[target_feature(enable = "avx512f")]
38276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38277#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38278#[rustc_legacy_const_generics(4)]
38279pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38280    src: __m128,
38281    k: __mmask8,
38282    a: __m128,
38283    b: __m128,
38284) -> __m128 {
38285    unsafe {
38286        static_assert_rounding!(ROUNDING);
38287        let a = a.as_f32x4();
38288        let b = b.as_f32x4();
38289        let src = src.as_f32x4();
38290        let r = vmulss(a, b, src, k, ROUNDING);
38291        transmute(r)
38292    }
38293}
38294
38295/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38296///
38297/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38298/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38299/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38300/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38301/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38302/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38303///
38304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38305#[inline]
38306#[target_feature(enable = "avx512f")]
38307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38308#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38309#[rustc_legacy_const_generics(3)]
38310pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38311    unsafe {
38312        static_assert_rounding!(ROUNDING);
38313        let a = a.as_f32x4();
38314        let b = b.as_f32x4();
38315        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
38316        transmute(r)
38317    }
38318}
38319
38320/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38321///
38322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38328///
38329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38330#[inline]
38331#[target_feature(enable = "avx512f")]
38332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38333#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38334#[rustc_legacy_const_generics(2)]
38335pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38336    unsafe {
38337        static_assert_rounding!(ROUNDING);
38338        let a = a.as_f64x2();
38339        let b = b.as_f64x2();
38340        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38341        transmute(r)
38342    }
38343}
38344
38345/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38346///
38347/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38348/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38349/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38350/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38351/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38352/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38353///
38354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38355#[inline]
38356#[target_feature(enable = "avx512f")]
38357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38358#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38359#[rustc_legacy_const_generics(4)]
38360pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38361    src: __m128d,
38362    k: __mmask8,
38363    a: __m128d,
38364    b: __m128d,
38365) -> __m128d {
38366    unsafe {
38367        static_assert_rounding!(ROUNDING);
38368        let a = a.as_f64x2();
38369        let b = b.as_f64x2();
38370        let src = src.as_f64x2();
38371        let r = vmulsd(a, b, src, k, ROUNDING);
38372        transmute(r)
38373    }
38374}
38375
38376/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38377///
38378/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38379/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38380/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38381/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38382/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38383/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38384///
38385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38386#[inline]
38387#[target_feature(enable = "avx512f")]
38388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38389#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38390#[rustc_legacy_const_generics(3)]
38391pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38392    unsafe {
38393        static_assert_rounding!(ROUNDING);
38394        let a = a.as_f64x2();
38395        let b = b.as_f64x2();
38396        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
38397        transmute(r)
38398    }
38399}
38400
38401/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38402///
38403/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38404/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38405/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38406/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38407/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38408/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38409///
38410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38411#[inline]
38412#[target_feature(enable = "avx512f")]
38413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38414#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38415#[rustc_legacy_const_generics(2)]
38416pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38417    unsafe {
38418        static_assert_rounding!(ROUNDING);
38419        let a = a.as_f32x4();
38420        let b = b.as_f32x4();
38421        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38422        transmute(r)
38423    }
38424}
38425
38426/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38427///
38428/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38429/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38430/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38431/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38432/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38433/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38434///
38435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38436#[inline]
38437#[target_feature(enable = "avx512f")]
38438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38439#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38440#[rustc_legacy_const_generics(4)]
38441pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38442    src: __m128,
38443    k: __mmask8,
38444    a: __m128,
38445    b: __m128,
38446) -> __m128 {
38447    unsafe {
38448        static_assert_rounding!(ROUNDING);
38449        let a = a.as_f32x4();
38450        let b = b.as_f32x4();
38451        let src = src.as_f32x4();
38452        let r = vdivss(a, b, src, k, ROUNDING);
38453        transmute(r)
38454    }
38455}
38456
38457/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38458///
38459/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38460/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38461/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38462/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38463/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38464/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38465///
38466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38467#[inline]
38468#[target_feature(enable = "avx512f")]
38469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38470#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38471#[rustc_legacy_const_generics(3)]
38472pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38473    unsafe {
38474        static_assert_rounding!(ROUNDING);
38475        let a = a.as_f32x4();
38476        let b = b.as_f32x4();
38477        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
38478        transmute(r)
38479    }
38480}
38481
38482/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38483///
38484/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38485/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38486/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38487/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38488/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38489/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38490///
38491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38492#[inline]
38493#[target_feature(enable = "avx512f")]
38494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38495#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38496#[rustc_legacy_const_generics(2)]
38497pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38498    unsafe {
38499        static_assert_rounding!(ROUNDING);
38500        let a = a.as_f64x2();
38501        let b = b.as_f64x2();
38502        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38503        transmute(r)
38504    }
38505}
38506
38507/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38508///
38509/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38510/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38511/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38512/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38513/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38514/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38515///
38516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38517#[inline]
38518#[target_feature(enable = "avx512f")]
38519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38520#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38521#[rustc_legacy_const_generics(4)]
38522pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38523    src: __m128d,
38524    k: __mmask8,
38525    a: __m128d,
38526    b: __m128d,
38527) -> __m128d {
38528    unsafe {
38529        static_assert_rounding!(ROUNDING);
38530        let a = a.as_f64x2();
38531        let b = b.as_f64x2();
38532        let src = src.as_f64x2();
38533        let r = vdivsd(a, b, src, k, ROUNDING);
38534        transmute(r)
38535    }
38536}
38537
38538/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38539///
38540/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38546///
38547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38548#[inline]
38549#[target_feature(enable = "avx512f")]
38550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38551#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38552#[rustc_legacy_const_generics(3)]
38553pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38554    unsafe {
38555        static_assert_rounding!(ROUNDING);
38556        let a = a.as_f64x2();
38557        let b = b.as_f64x2();
38558        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
38559        transmute(r)
38560    }
38561}
38562
38563/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38565///
38566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38567#[inline]
38568#[target_feature(enable = "avx512f")]
38569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38570#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38571#[rustc_legacy_const_generics(2)]
38572pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38573    unsafe {
38574        static_assert_sae!(SAE);
38575        let a = a.as_f32x4();
38576        let b = b.as_f32x4();
38577        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
38578        transmute(r)
38579    }
38580}
38581
38582/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38583/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38584///
38585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38586#[inline]
38587#[target_feature(enable = "avx512f")]
38588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38589#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38590#[rustc_legacy_const_generics(4)]
38591pub fn _mm_mask_max_round_ss<const SAE: i32>(
38592    src: __m128,
38593    k: __mmask8,
38594    a: __m128,
38595    b: __m128,
38596) -> __m128 {
38597    unsafe {
38598        static_assert_sae!(SAE);
38599        let a = a.as_f32x4();
38600        let b = b.as_f32x4();
38601        let src = src.as_f32x4();
38602        let r = vmaxss(a, b, src, k, SAE);
38603        transmute(r)
38604    }
38605}
38606
38607/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38608/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38609///
38610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38611#[inline]
38612#[target_feature(enable = "avx512f")]
38613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38614#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38615#[rustc_legacy_const_generics(3)]
38616pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38617    unsafe {
38618        static_assert_sae!(SAE);
38619        let a = a.as_f32x4();
38620        let b = b.as_f32x4();
38621        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
38622        transmute(r)
38623    }
38624}
38625
38626/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38627/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38628///
38629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38630#[inline]
38631#[target_feature(enable = "avx512f")]
38632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38633#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38634#[rustc_legacy_const_generics(2)]
38635pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38636    unsafe {
38637        static_assert_sae!(SAE);
38638        let a = a.as_f64x2();
38639        let b = b.as_f64x2();
38640        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
38641        transmute(r)
38642    }
38643}
38644
38645/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38646/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38647///
38648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38649#[inline]
38650#[target_feature(enable = "avx512f")]
38651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38652#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38653#[rustc_legacy_const_generics(4)]
38654pub fn _mm_mask_max_round_sd<const SAE: i32>(
38655    src: __m128d,
38656    k: __mmask8,
38657    a: __m128d,
38658    b: __m128d,
38659) -> __m128d {
38660    unsafe {
38661        static_assert_sae!(SAE);
38662        let a = a.as_f64x2();
38663        let b = b.as_f64x2();
38664        let src = src.as_f64x2();
38665        let r = vmaxsd(a, b, src, k, SAE);
38666        transmute(r)
38667    }
38668}
38669
38670/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38672///
38673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38674#[inline]
38675#[target_feature(enable = "avx512f")]
38676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38677#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38678#[rustc_legacy_const_generics(3)]
38679pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38680    unsafe {
38681        static_assert_sae!(SAE);
38682        let a = a.as_f64x2();
38683        let b = b.as_f64x2();
38684        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
38685        transmute(r)
38686    }
38687}
38688
38689/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38691///
38692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38693#[inline]
38694#[target_feature(enable = "avx512f")]
38695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38696#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38697#[rustc_legacy_const_generics(2)]
38698pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38699    unsafe {
38700        static_assert_sae!(SAE);
38701        let a = a.as_f32x4();
38702        let b = b.as_f32x4();
38703        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
38704        transmute(r)
38705    }
38706}
38707
38708/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38709/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38710///
38711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38712#[inline]
38713#[target_feature(enable = "avx512f")]
38714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38715#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38716#[rustc_legacy_const_generics(4)]
38717pub fn _mm_mask_min_round_ss<const SAE: i32>(
38718    src: __m128,
38719    k: __mmask8,
38720    a: __m128,
38721    b: __m128,
38722) -> __m128 {
38723    unsafe {
38724        static_assert_sae!(SAE);
38725        let a = a.as_f32x4();
38726        let b = b.as_f32x4();
38727        let src = src.as_f32x4();
38728        let r = vminss(a, b, src, k, SAE);
38729        transmute(r)
38730    }
38731}
38732
38733/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38734/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38735///
38736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38737#[inline]
38738#[target_feature(enable = "avx512f")]
38739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38740#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38741#[rustc_legacy_const_generics(3)]
38742pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38743    unsafe {
38744        static_assert_sae!(SAE);
38745        let a = a.as_f32x4();
38746        let b = b.as_f32x4();
38747        let r = vminss(a, b, f32x4::ZERO, k, SAE);
38748        transmute(r)
38749    }
38750}
38751
38752/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38753/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38754///
38755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38756#[inline]
38757#[target_feature(enable = "avx512f")]
38758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38759#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38760#[rustc_legacy_const_generics(2)]
38761pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38762    unsafe {
38763        static_assert_sae!(SAE);
38764        let a = a.as_f64x2();
38765        let b = b.as_f64x2();
38766        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
38767        transmute(r)
38768    }
38769}
38770
38771/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38772/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38773///
38774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38775#[inline]
38776#[target_feature(enable = "avx512f")]
38777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38778#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38779#[rustc_legacy_const_generics(4)]
38780pub fn _mm_mask_min_round_sd<const SAE: i32>(
38781    src: __m128d,
38782    k: __mmask8,
38783    a: __m128d,
38784    b: __m128d,
38785) -> __m128d {
38786    unsafe {
38787        static_assert_sae!(SAE);
38788        let a = a.as_f64x2();
38789        let b = b.as_f64x2();
38790        let src = src.as_f64x2();
38791        let r = vminsd(a, b, src, k, SAE);
38792        transmute(r)
38793    }
38794}
38795
38796/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38797/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38798///
38799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38800#[inline]
38801#[target_feature(enable = "avx512f")]
38802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38803#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38804#[rustc_legacy_const_generics(3)]
38805pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38806    unsafe {
38807        static_assert_sae!(SAE);
38808        let a = a.as_f64x2();
38809        let b = b.as_f64x2();
38810        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
38811        transmute(r)
38812    }
38813}
38814
38815/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38816///
38817/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38818/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38819/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38820/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38821/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38822/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38823///
38824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38825#[inline]
38826#[target_feature(enable = "avx512f")]
38827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38828#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38829#[rustc_legacy_const_generics(2)]
38830pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38831    unsafe {
38832        static_assert_rounding!(ROUNDING);
38833        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
38834    }
38835}
38836
38837/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38838///
38839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38845///
38846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38847#[inline]
38848#[target_feature(enable = "avx512f")]
38849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38850#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38851#[rustc_legacy_const_generics(4)]
38852pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38853    src: __m128,
38854    k: __mmask8,
38855    a: __m128,
38856    b: __m128,
38857) -> __m128 {
38858    unsafe {
38859        static_assert_rounding!(ROUNDING);
38860        vsqrtss(a, b, src, k, ROUNDING)
38861    }
38862}
38863
38864/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38865///
38866/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38867/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38868/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38869/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38870/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38872///
38873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38874#[inline]
38875#[target_feature(enable = "avx512f")]
38876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38877#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38878#[rustc_legacy_const_generics(3)]
38879pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38880    unsafe {
38881        static_assert_rounding!(ROUNDING);
38882        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
38883    }
38884}
38885
38886/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38887///
38888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38894///
38895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38896#[inline]
38897#[target_feature(enable = "avx512f")]
38898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38899#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38900#[rustc_legacy_const_generics(2)]
38901pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38902    unsafe {
38903        static_assert_rounding!(ROUNDING);
38904        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
38905    }
38906}
38907
38908/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38909///
38910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38916///
38917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38918#[inline]
38919#[target_feature(enable = "avx512f")]
38920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38921#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38922#[rustc_legacy_const_generics(4)]
38923pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38924    src: __m128d,
38925    k: __mmask8,
38926    a: __m128d,
38927    b: __m128d,
38928) -> __m128d {
38929    unsafe {
38930        static_assert_rounding!(ROUNDING);
38931        vsqrtsd(a, b, src, k, ROUNDING)
38932    }
38933}
38934
38935/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38936///
38937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38943///
38944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
38945#[inline]
38946#[target_feature(enable = "avx512f")]
38947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38948#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38949#[rustc_legacy_const_generics(3)]
38950pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
38951    k: __mmask8,
38952    a: __m128d,
38953    b: __m128d,
38954) -> __m128d {
38955    unsafe {
38956        static_assert_rounding!(ROUNDING);
38957        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
38958    }
38959}
38960
38961/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38962/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38963///
38964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
38965#[inline]
38966#[target_feature(enable = "avx512f")]
38967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38968#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
38969#[rustc_legacy_const_generics(2)]
38970pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38971    unsafe {
38972        static_assert_sae!(SAE);
38973        let a = a.as_f32x4();
38974        let b = b.as_f32x4();
38975        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
38976        transmute(r)
38977    }
38978}
38979
38980/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38981/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38982///
38983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
38984#[inline]
38985#[target_feature(enable = "avx512f")]
38986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38987#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
38988#[rustc_legacy_const_generics(4)]
38989pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
38990    src: __m128,
38991    k: __mmask8,
38992    a: __m128,
38993    b: __m128,
38994) -> __m128 {
38995    unsafe {
38996        static_assert_sae!(SAE);
38997        let a = a.as_f32x4();
38998        let b = b.as_f32x4();
38999        let src = src.as_f32x4();
39000        let r = vgetexpss(a, b, src, k, SAE);
39001        transmute(r)
39002    }
39003}
39004
39005/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39006/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39007///
39008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39009#[inline]
39010#[target_feature(enable = "avx512f")]
39011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39012#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39013#[rustc_legacy_const_generics(3)]
39014pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39015    unsafe {
39016        static_assert_sae!(SAE);
39017        let a = a.as_f32x4();
39018        let b = b.as_f32x4();
39019        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
39020        transmute(r)
39021    }
39022}
39023
39024/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39026///
39027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39028#[inline]
39029#[target_feature(enable = "avx512f")]
39030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39031#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39032#[rustc_legacy_const_generics(2)]
39033pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39034    unsafe {
39035        static_assert_sae!(SAE);
39036        let a = a.as_f64x2();
39037        let b = b.as_f64x2();
39038        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
39039        transmute(r)
39040    }
39041}
39042
39043/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39044/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39045///
39046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39047#[inline]
39048#[target_feature(enable = "avx512f")]
39049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39050#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39051#[rustc_legacy_const_generics(4)]
39052pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39053    src: __m128d,
39054    k: __mmask8,
39055    a: __m128d,
39056    b: __m128d,
39057) -> __m128d {
39058    unsafe {
39059        static_assert_sae!(SAE);
39060        let a = a.as_f64x2();
39061        let b = b.as_f64x2();
39062        let src = src.as_f64x2();
39063        let r = vgetexpsd(a, b, src, k, SAE);
39064        transmute(r)
39065    }
39066}
39067
39068/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39069/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39070///
39071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39072#[inline]
39073#[target_feature(enable = "avx512f")]
39074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39075#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39076#[rustc_legacy_const_generics(3)]
39077pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39078    unsafe {
39079        static_assert_sae!(SAE);
39080        let a = a.as_f64x2();
39081        let b = b.as_f64x2();
39082        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
39083        transmute(r)
39084    }
39085}
39086
39087/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39088/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39089///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39090///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39091///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39092///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39093/// The sign is determined by sc which can take the following values:\
39094///    _MM_MANT_SIGN_src     // sign = sign(src)\
39095///    _MM_MANT_SIGN_zero    // sign = 0\
39096///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39097/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39098///
39099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39100#[inline]
39101#[target_feature(enable = "avx512f")]
39102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39103#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39104#[rustc_legacy_const_generics(2, 3, 4)]
39105pub fn _mm_getmant_round_ss<
39106    const NORM: _MM_MANTISSA_NORM_ENUM,
39107    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39108    const SAE: i32,
39109>(
39110    a: __m128,
39111    b: __m128,
39112) -> __m128 {
39113    unsafe {
39114        static_assert_uimm_bits!(NORM, 4);
39115        static_assert_uimm_bits!(SIGN, 2);
39116        static_assert_mantissas_sae!(SAE);
39117        let a = a.as_f32x4();
39118        let b = b.as_f32x4();
39119        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
39120        transmute(r)
39121    }
39122}
39123
39124/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39125/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39126///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39127///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39128///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39129///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39130/// The sign is determined by sc which can take the following values:\
39131///    _MM_MANT_SIGN_src     // sign = sign(src)\
39132///    _MM_MANT_SIGN_zero    // sign = 0\
39133///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39134/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39135///
39136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39137#[inline]
39138#[target_feature(enable = "avx512f")]
39139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39140#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39141#[rustc_legacy_const_generics(4, 5, 6)]
39142pub fn _mm_mask_getmant_round_ss<
39143    const NORM: _MM_MANTISSA_NORM_ENUM,
39144    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39145    const SAE: i32,
39146>(
39147    src: __m128,
39148    k: __mmask8,
39149    a: __m128,
39150    b: __m128,
39151) -> __m128 {
39152    unsafe {
39153        static_assert_uimm_bits!(NORM, 4);
39154        static_assert_uimm_bits!(SIGN, 2);
39155        static_assert_mantissas_sae!(SAE);
39156        let a = a.as_f32x4();
39157        let b = b.as_f32x4();
39158        let src = src.as_f32x4();
39159        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
39160        transmute(r)
39161    }
39162}
39163
39164/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39165/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39166///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39167///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39168///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39169///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39170/// The sign is determined by sc which can take the following values:\
39171///    _MM_MANT_SIGN_src     // sign = sign(src)\
39172///    _MM_MANT_SIGN_zero    // sign = 0\
39173///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39174/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39175///
39176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39177#[inline]
39178#[target_feature(enable = "avx512f")]
39179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39180#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39181#[rustc_legacy_const_generics(3, 4, 5)]
39182pub fn _mm_maskz_getmant_round_ss<
39183    const NORM: _MM_MANTISSA_NORM_ENUM,
39184    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39185    const SAE: i32,
39186>(
39187    k: __mmask8,
39188    a: __m128,
39189    b: __m128,
39190) -> __m128 {
39191    unsafe {
39192        static_assert_uimm_bits!(NORM, 4);
39193        static_assert_uimm_bits!(SIGN, 2);
39194        static_assert_mantissas_sae!(SAE);
39195        let a = a.as_f32x4();
39196        let b = b.as_f32x4();
39197        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
39198        transmute(r)
39199    }
39200}
39201
39202/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39203/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39204///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39205///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39206///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39207///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39208/// The sign is determined by sc which can take the following values:\
39209///    _MM_MANT_SIGN_src     // sign = sign(src)\
39210///    _MM_MANT_SIGN_zero    // sign = 0\
39211///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39212/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39213///
39214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39215#[inline]
39216#[target_feature(enable = "avx512f")]
39217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39218#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39219#[rustc_legacy_const_generics(2, 3, 4)]
39220pub fn _mm_getmant_round_sd<
39221    const NORM: _MM_MANTISSA_NORM_ENUM,
39222    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39223    const SAE: i32,
39224>(
39225    a: __m128d,
39226    b: __m128d,
39227) -> __m128d {
39228    unsafe {
39229        static_assert_uimm_bits!(NORM, 4);
39230        static_assert_uimm_bits!(SIGN, 2);
39231        static_assert_mantissas_sae!(SAE);
39232        let a = a.as_f64x2();
39233        let b = b.as_f64x2();
39234        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
39235        transmute(r)
39236    }
39237}
39238
39239/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39240/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39241///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39242///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39243///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39244///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39245/// The sign is determined by sc which can take the following values:\
39246///    _MM_MANT_SIGN_src     // sign = sign(src)\
39247///    _MM_MANT_SIGN_zero    // sign = 0\
39248///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39249/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39250///
39251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39252#[inline]
39253#[target_feature(enable = "avx512f")]
39254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39255#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39256#[rustc_legacy_const_generics(4, 5, 6)]
39257pub fn _mm_mask_getmant_round_sd<
39258    const NORM: _MM_MANTISSA_NORM_ENUM,
39259    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39260    const SAE: i32,
39261>(
39262    src: __m128d,
39263    k: __mmask8,
39264    a: __m128d,
39265    b: __m128d,
39266) -> __m128d {
39267    unsafe {
39268        static_assert_uimm_bits!(NORM, 4);
39269        static_assert_uimm_bits!(SIGN, 2);
39270        static_assert_mantissas_sae!(SAE);
39271        let a = a.as_f64x2();
39272        let b = b.as_f64x2();
39273        let src = src.as_f64x2();
39274        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
39275        transmute(r)
39276    }
39277}
39278
39279/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39280/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39281///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39282///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39283///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39284///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39285/// The sign is determined by sc which can take the following values:\
39286///    _MM_MANT_SIGN_src     // sign = sign(src)\
39287///    _MM_MANT_SIGN_zero    // sign = 0\
39288///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39289/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39290///
39291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39292#[inline]
39293#[target_feature(enable = "avx512f")]
39294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39295#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39296#[rustc_legacy_const_generics(3, 4, 5)]
39297pub fn _mm_maskz_getmant_round_sd<
39298    const NORM: _MM_MANTISSA_NORM_ENUM,
39299    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39300    const SAE: i32,
39301>(
39302    k: __mmask8,
39303    a: __m128d,
39304    b: __m128d,
39305) -> __m128d {
39306    unsafe {
39307        static_assert_uimm_bits!(NORM, 4);
39308        static_assert_uimm_bits!(SIGN, 2);
39309        static_assert_mantissas_sae!(SAE);
39310        let a = a.as_f64x2();
39311        let b = b.as_f64x2();
39312        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
39313        transmute(r)
39314    }
39315}
39316
39317/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39318/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39319/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39320/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39321/// * [`_MM_FROUND_TO_POS_INF`] : round up
39322/// * [`_MM_FROUND_TO_ZERO`] : truncate
39323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39324///
39325/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39327#[inline]
39328#[target_feature(enable = "avx512f")]
39329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39330#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39331#[rustc_legacy_const_generics(2, 3)]
39332pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39333    unsafe {
39334        static_assert_uimm_bits!(IMM8, 8);
39335        static_assert_mantissas_sae!(SAE);
39336        let a = a.as_f32x4();
39337        let b = b.as_f32x4();
39338        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
39339        transmute(r)
39340    }
39341}
39342
39343/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39344/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39345/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39346/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39347/// * [`_MM_FROUND_TO_POS_INF`] : round up
39348/// * [`_MM_FROUND_TO_ZERO`] : truncate
39349/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39350///
39351/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39353#[inline]
39354#[target_feature(enable = "avx512f")]
39355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39356#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39357#[rustc_legacy_const_generics(4, 5)]
39358pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39359    src: __m128,
39360    k: __mmask8,
39361    a: __m128,
39362    b: __m128,
39363) -> __m128 {
39364    unsafe {
39365        static_assert_uimm_bits!(IMM8, 8);
39366        static_assert_mantissas_sae!(SAE);
39367        let a = a.as_f32x4();
39368        let b = b.as_f32x4();
39369        let src = src.as_f32x4();
39370        let r = vrndscaless(a, b, src, k, IMM8, SAE);
39371        transmute(r)
39372    }
39373}
39374
39375/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39376/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39377/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39378/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39379/// * [`_MM_FROUND_TO_POS_INF`] : round up
39380/// * [`_MM_FROUND_TO_ZERO`] : truncate
39381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39382///
39383/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39385#[inline]
39386#[target_feature(enable = "avx512f")]
39387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39388#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39389#[rustc_legacy_const_generics(3, 4)]
39390pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39391    k: __mmask8,
39392    a: __m128,
39393    b: __m128,
39394) -> __m128 {
39395    unsafe {
39396        static_assert_uimm_bits!(IMM8, 8);
39397        static_assert_mantissas_sae!(SAE);
39398        let a = a.as_f32x4();
39399        let b = b.as_f32x4();
39400        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
39401        transmute(r)
39402    }
39403}
39404
39405/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39406/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39407/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39408/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39409/// * [`_MM_FROUND_TO_POS_INF`] : round up
39410/// * [`_MM_FROUND_TO_ZERO`] : truncate
39411/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39412///
39413/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39415#[inline]
39416#[target_feature(enable = "avx512f")]
39417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39418#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39419#[rustc_legacy_const_generics(2, 3)]
39420pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39421    unsafe {
39422        static_assert_uimm_bits!(IMM8, 8);
39423        static_assert_mantissas_sae!(SAE);
39424        let a = a.as_f64x2();
39425        let b = b.as_f64x2();
39426        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
39427        transmute(r)
39428    }
39429}
39430
39431/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39432/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39433/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39434/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39435/// * [`_MM_FROUND_TO_POS_INF`] : round up
39436/// * [`_MM_FROUND_TO_ZERO`] : truncate
39437/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39438///
39439/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39441#[inline]
39442#[target_feature(enable = "avx512f")]
39443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39444#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39445#[rustc_legacy_const_generics(4, 5)]
39446pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39447    src: __m128d,
39448    k: __mmask8,
39449    a: __m128d,
39450    b: __m128d,
39451) -> __m128d {
39452    unsafe {
39453        static_assert_uimm_bits!(IMM8, 8);
39454        static_assert_mantissas_sae!(SAE);
39455        let a = a.as_f64x2();
39456        let b = b.as_f64x2();
39457        let src = src.as_f64x2();
39458        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
39459        transmute(r)
39460    }
39461}
39462
39463/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39464/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39465/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39466/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39467/// * [`_MM_FROUND_TO_POS_INF`] : round up
39468/// * [`_MM_FROUND_TO_ZERO`] : truncate
39469/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39470///
39471/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39473#[inline]
39474#[target_feature(enable = "avx512f")]
39475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39476#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39477#[rustc_legacy_const_generics(3, 4)]
39478pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39479    k: __mmask8,
39480    a: __m128d,
39481    b: __m128d,
39482) -> __m128d {
39483    unsafe {
39484        static_assert_uimm_bits!(IMM8, 8);
39485        static_assert_mantissas_sae!(SAE);
39486        let a = a.as_f64x2();
39487        let b = b.as_f64x2();
39488        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
39489        transmute(r)
39490    }
39491}
39492
39493/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39494///
39495/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39496/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39497/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39498/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39499/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39500/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39501///
39502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39503#[inline]
39504#[target_feature(enable = "avx512f")]
39505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39506#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39507#[rustc_legacy_const_generics(2)]
39508pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39509    unsafe {
39510        static_assert_rounding!(ROUNDING);
39511        let a = a.as_f32x4();
39512        let b = b.as_f32x4();
39513        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
39514        transmute(r)
39515    }
39516}
39517
39518/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39519///
39520/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39521/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39522/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39523/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39524/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39525/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39526///
39527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39528#[inline]
39529#[target_feature(enable = "avx512f")]
39530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39531#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39532#[rustc_legacy_const_generics(4)]
39533pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39534    src: __m128,
39535    k: __mmask8,
39536    a: __m128,
39537    b: __m128,
39538) -> __m128 {
39539    unsafe {
39540        static_assert_rounding!(ROUNDING);
39541        let a = a.as_f32x4();
39542        let b = b.as_f32x4();
39543        let src = src.as_f32x4();
39544        let r = vscalefss(a, b, src, k, ROUNDING);
39545        transmute(r)
39546    }
39547}
39548
39549/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39550///
39551/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39552/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39553/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39554/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39555/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39556/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39557///
39558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39559#[inline]
39560#[target_feature(enable = "avx512f")]
39561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39562#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39563#[rustc_legacy_const_generics(3)]
39564pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39565    unsafe {
39566        static_assert_rounding!(ROUNDING);
39567        let a = a.as_f32x4();
39568        let b = b.as_f32x4();
39569        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
39570        transmute(r)
39571    }
39572}
39573
39574/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39575///
39576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39582///
39583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39584#[inline]
39585#[target_feature(enable = "avx512f")]
39586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39587#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39588#[rustc_legacy_const_generics(2)]
39589pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39590    unsafe {
39591        static_assert_rounding!(ROUNDING);
39592        let a = a.as_f64x2();
39593        let b = b.as_f64x2();
39594        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
39595        transmute(r)
39596    }
39597}
39598
39599/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39600///
39601/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39602/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39603/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39604/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39605/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39607///
39608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39609#[inline]
39610#[target_feature(enable = "avx512f")]
39611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39612#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39613#[rustc_legacy_const_generics(4)]
39614pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39615    src: __m128d,
39616    k: __mmask8,
39617    a: __m128d,
39618    b: __m128d,
39619) -> __m128d {
39620    unsafe {
39621        let a = a.as_f64x2();
39622        let b = b.as_f64x2();
39623        let src = src.as_f64x2();
39624        let r = vscalefsd(a, b, src, k, ROUNDING);
39625        transmute(r)
39626    }
39627}
39628
39629/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39630///
39631/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39632/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39633/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39634/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39635/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39636/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39637///
39638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39639#[inline]
39640#[target_feature(enable = "avx512f")]
39641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39642#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39643#[rustc_legacy_const_generics(3)]
39644pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39645    k: __mmask8,
39646    a: __m128d,
39647    b: __m128d,
39648) -> __m128d {
39649    unsafe {
39650        static_assert_rounding!(ROUNDING);
39651        let a = a.as_f64x2();
39652        let b = b.as_f64x2();
39653        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
39654        transmute(r)
39655    }
39656}
39657
39658/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39659///
39660/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39661/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39662/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39663/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39664/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39665/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39666///
39667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39668#[inline]
39669#[target_feature(enable = "avx512f")]
39670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39671#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39672#[rustc_legacy_const_generics(3)]
39673pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39674    unsafe {
39675        static_assert_rounding!(ROUNDING);
39676        let extracta: f32 = simd_extract!(a, 0);
39677        let extractb: f32 = simd_extract!(b, 0);
39678        let extractc: f32 = simd_extract!(c, 0);
39679        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39680        simd_insert!(a, 0, r)
39681    }
39682}
39683
39684/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39685///
39686/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39687/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39688/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39689/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39690/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39691/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39692///
39693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39694#[inline]
39695#[target_feature(enable = "avx512f")]
39696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39697#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39698#[rustc_legacy_const_generics(4)]
39699pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39700    a: __m128,
39701    k: __mmask8,
39702    b: __m128,
39703    c: __m128,
39704) -> __m128 {
39705    unsafe {
39706        static_assert_rounding!(ROUNDING);
39707        let mut fmadd: f32 = simd_extract!(a, 0);
39708        if (k & 0b00000001) != 0 {
39709            let extractb: f32 = simd_extract!(b, 0);
39710            let extractc: f32 = simd_extract!(c, 0);
39711            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
39712        }
39713        simd_insert!(a, 0, fmadd)
39714    }
39715}
39716
39717/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39718///
39719/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39720/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39721/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39722/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39723/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39724/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39725///
39726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39727#[inline]
39728#[target_feature(enable = "avx512f")]
39729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39730#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39731#[rustc_legacy_const_generics(4)]
39732pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39733    k: __mmask8,
39734    a: __m128,
39735    b: __m128,
39736    c: __m128,
39737) -> __m128 {
39738    unsafe {
39739        static_assert_rounding!(ROUNDING);
39740        let mut fmadd: f32 = 0.;
39741        if (k & 0b00000001) != 0 {
39742            let extracta: f32 = simd_extract!(a, 0);
39743            let extractb: f32 = simd_extract!(b, 0);
39744            let extractc: f32 = simd_extract!(c, 0);
39745            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39746        }
39747        simd_insert!(a, 0, fmadd)
39748    }
39749}
39750
39751/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39752///
39753/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39754/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39755/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39756/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39757/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39758/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39759///
39760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39761#[inline]
39762#[target_feature(enable = "avx512f")]
39763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39764#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39765#[rustc_legacy_const_generics(4)]
39766pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39767    a: __m128,
39768    b: __m128,
39769    c: __m128,
39770    k: __mmask8,
39771) -> __m128 {
39772    unsafe {
39773        static_assert_rounding!(ROUNDING);
39774        let mut fmadd: f32 = simd_extract!(c, 0);
39775        if (k & 0b00000001) != 0 {
39776            let extracta: f32 = simd_extract!(a, 0);
39777            let extractb: f32 = simd_extract!(b, 0);
39778            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
39779        }
39780        simd_insert!(c, 0, fmadd)
39781    }
39782}
39783
39784/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39785///
39786/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39787/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39788/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39789/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39790/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39791/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39792///
39793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39794#[inline]
39795#[target_feature(enable = "avx512f")]
39796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39797#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39798#[rustc_legacy_const_generics(3)]
39799pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39800    unsafe {
39801        static_assert_rounding!(ROUNDING);
39802        let extracta: f64 = simd_extract!(a, 0);
39803        let extractb: f64 = simd_extract!(b, 0);
39804        let extractc: f64 = simd_extract!(c, 0);
39805        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39806        simd_insert!(a, 0, fmadd)
39807    }
39808}
39809
39810/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39811///
39812/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39813/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39814/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39815/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39816/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39817/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39818///
39819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39820#[inline]
39821#[target_feature(enable = "avx512f")]
39822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39823#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39824#[rustc_legacy_const_generics(4)]
39825pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39826    a: __m128d,
39827    k: __mmask8,
39828    b: __m128d,
39829    c: __m128d,
39830) -> __m128d {
39831    unsafe {
39832        static_assert_rounding!(ROUNDING);
39833        let mut fmadd: f64 = simd_extract!(a, 0);
39834        if (k & 0b00000001) != 0 {
39835            let extractb: f64 = simd_extract!(b, 0);
39836            let extractc: f64 = simd_extract!(c, 0);
39837            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
39838        }
39839        simd_insert!(a, 0, fmadd)
39840    }
39841}
39842
39843/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39844///
39845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39851///
39852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39853#[inline]
39854#[target_feature(enable = "avx512f")]
39855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39856#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39857#[rustc_legacy_const_generics(4)]
39858pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39859    k: __mmask8,
39860    a: __m128d,
39861    b: __m128d,
39862    c: __m128d,
39863) -> __m128d {
39864    unsafe {
39865        static_assert_rounding!(ROUNDING);
39866        let mut fmadd: f64 = 0.;
39867        if (k & 0b00000001) != 0 {
39868            let extracta: f64 = simd_extract!(a, 0);
39869            let extractb: f64 = simd_extract!(b, 0);
39870            let extractc: f64 = simd_extract!(c, 0);
39871            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39872        }
39873        simd_insert!(a, 0, fmadd)
39874    }
39875}
39876
39877/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39878///
39879/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39880/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39881/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39882/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39883/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39884/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39885///
39886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39887#[inline]
39888#[target_feature(enable = "avx512f")]
39889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39890#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39891#[rustc_legacy_const_generics(4)]
39892pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39893    a: __m128d,
39894    b: __m128d,
39895    c: __m128d,
39896    k: __mmask8,
39897) -> __m128d {
39898    unsafe {
39899        static_assert_rounding!(ROUNDING);
39900        let mut fmadd: f64 = simd_extract!(c, 0);
39901        if (k & 0b00000001) != 0 {
39902            let extracta: f64 = simd_extract!(a, 0);
39903            let extractb: f64 = simd_extract!(b, 0);
39904            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
39905        }
39906        simd_insert!(c, 0, fmadd)
39907    }
39908}
39909
39910/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39911///
39912/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39913/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39914/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39915/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39916/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39918///
39919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39920#[inline]
39921#[target_feature(enable = "avx512f")]
39922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39923#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39924#[rustc_legacy_const_generics(3)]
39925pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39926    unsafe {
39927        static_assert_rounding!(ROUNDING);
39928        let extracta: f32 = simd_extract!(a, 0);
39929        let extractb: f32 = simd_extract!(b, 0);
39930        let extractc: f32 = simd_extract!(c, 0);
39931        let extractc = -extractc;
39932        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39933        simd_insert!(a, 0, fmsub)
39934    }
39935}
39936
39937/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39938///
39939/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39940/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39941/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39942/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39943/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39944/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39945///
39946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
39947#[inline]
39948#[target_feature(enable = "avx512f")]
39949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39950#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39951#[rustc_legacy_const_generics(4)]
39952pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
39953    a: __m128,
39954    k: __mmask8,
39955    b: __m128,
39956    c: __m128,
39957) -> __m128 {
39958    unsafe {
39959        static_assert_rounding!(ROUNDING);
39960        let mut fmsub: f32 = simd_extract!(a, 0);
39961        if (k & 0b00000001) != 0 {
39962            let extractb: f32 = simd_extract!(b, 0);
39963            let extractc: f32 = simd_extract!(c, 0);
39964            let extractc = -extractc;
39965            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
39966        }
39967        simd_insert!(a, 0, fmsub)
39968    }
39969}
39970
39971/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39972///
39973/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39974/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39975/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39976/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39977/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39978/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39979///
39980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
39981#[inline]
39982#[target_feature(enable = "avx512f")]
39983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39984#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39985#[rustc_legacy_const_generics(4)]
39986pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
39987    k: __mmask8,
39988    a: __m128,
39989    b: __m128,
39990    c: __m128,
39991) -> __m128 {
39992    unsafe {
39993        static_assert_rounding!(ROUNDING);
39994        let mut fmsub: f32 = 0.;
39995        if (k & 0b00000001) != 0 {
39996            let extracta: f32 = simd_extract!(a, 0);
39997            let extractb: f32 = simd_extract!(b, 0);
39998            let extractc: f32 = simd_extract!(c, 0);
39999            let extractc = -extractc;
40000            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40001        }
40002        simd_insert!(a, 0, fmsub)
40003    }
40004}
40005
40006/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40007///
40008/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40009/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40010/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40011/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40012/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40013/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40014///
40015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40016#[inline]
40017#[target_feature(enable = "avx512f")]
40018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40019#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40020#[rustc_legacy_const_generics(4)]
40021pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40022    a: __m128,
40023    b: __m128,
40024    c: __m128,
40025    k: __mmask8,
40026) -> __m128 {
40027    unsafe {
40028        static_assert_rounding!(ROUNDING);
40029        let mut fmsub: f32 = simd_extract!(c, 0);
40030        if (k & 0b00000001) != 0 {
40031            let extracta: f32 = simd_extract!(a, 0);
40032            let extractb: f32 = simd_extract!(b, 0);
40033            let extractc = -fmsub;
40034            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40035        }
40036        simd_insert!(c, 0, fmsub)
40037    }
40038}
40039
40040/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40041///
40042/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40043/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40044/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40045/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40046/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40048///
40049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40050#[inline]
40051#[target_feature(enable = "avx512f")]
40052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40053#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40054#[rustc_legacy_const_generics(3)]
40055pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40056    unsafe {
40057        static_assert_rounding!(ROUNDING);
40058        let extracta: f64 = simd_extract!(a, 0);
40059        let extractb: f64 = simd_extract!(b, 0);
40060        let extractc: f64 = simd_extract!(c, 0);
40061        let extractc = -extractc;
40062        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40063        simd_insert!(a, 0, fmsub)
40064    }
40065}
40066
40067/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40068///
40069/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40070/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40071/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40072/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40073/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40074/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40075///
40076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40077#[inline]
40078#[target_feature(enable = "avx512f")]
40079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40080#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40081#[rustc_legacy_const_generics(4)]
40082pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40083    a: __m128d,
40084    k: __mmask8,
40085    b: __m128d,
40086    c: __m128d,
40087) -> __m128d {
40088    unsafe {
40089        static_assert_rounding!(ROUNDING);
40090        let mut fmsub: f64 = simd_extract!(a, 0);
40091        if (k & 0b00000001) != 0 {
40092            let extractb: f64 = simd_extract!(b, 0);
40093            let extractc: f64 = simd_extract!(c, 0);
40094            let extractc = -extractc;
40095            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
40096        }
40097        simd_insert!(a, 0, fmsub)
40098    }
40099}
40100
40101/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40102///
40103/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40104/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40105/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40106/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40107/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40108/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40109///
40110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40111#[inline]
40112#[target_feature(enable = "avx512f")]
40113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40114#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40115#[rustc_legacy_const_generics(4)]
40116pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40117    k: __mmask8,
40118    a: __m128d,
40119    b: __m128d,
40120    c: __m128d,
40121) -> __m128d {
40122    unsafe {
40123        static_assert_rounding!(ROUNDING);
40124        let mut fmsub: f64 = 0.;
40125        if (k & 0b00000001) != 0 {
40126            let extracta: f64 = simd_extract!(a, 0);
40127            let extractb: f64 = simd_extract!(b, 0);
40128            let extractc: f64 = simd_extract!(c, 0);
40129            let extractc = -extractc;
40130            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40131        }
40132        simd_insert!(a, 0, fmsub)
40133    }
40134}
40135
40136/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40137///
40138/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40139/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40140/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40141/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40142/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40143/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40144///
40145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40146#[inline]
40147#[target_feature(enable = "avx512f")]
40148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40149#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40150#[rustc_legacy_const_generics(4)]
40151pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40152    a: __m128d,
40153    b: __m128d,
40154    c: __m128d,
40155    k: __mmask8,
40156) -> __m128d {
40157    unsafe {
40158        static_assert_rounding!(ROUNDING);
40159        let mut fmsub: f64 = simd_extract!(c, 0);
40160        if (k & 0b00000001) != 0 {
40161            let extracta: f64 = simd_extract!(a, 0);
40162            let extractb: f64 = simd_extract!(b, 0);
40163            let extractc = -fmsub;
40164            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40165        }
40166        simd_insert!(c, 0, fmsub)
40167    }
40168}
40169
40170/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40171///
40172/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40173/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40174/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40175/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40176/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40177/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40178///
40179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40180#[inline]
40181#[target_feature(enable = "avx512f")]
40182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40183#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40184#[rustc_legacy_const_generics(3)]
40185pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40186    unsafe {
40187        static_assert_rounding!(ROUNDING);
40188        let extracta: f32 = simd_extract!(a, 0);
40189        let extracta = -extracta;
40190        let extractb: f32 = simd_extract!(b, 0);
40191        let extractc: f32 = simd_extract!(c, 0);
40192        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40193        simd_insert!(a, 0, fnmadd)
40194    }
40195}
40196
40197/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40198///
40199/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40200/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40201/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40202/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40203/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40204/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40205///
40206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40207#[inline]
40208#[target_feature(enable = "avx512f")]
40209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40210#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40211#[rustc_legacy_const_generics(4)]
40212pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40213    a: __m128,
40214    k: __mmask8,
40215    b: __m128,
40216    c: __m128,
40217) -> __m128 {
40218    unsafe {
40219        static_assert_rounding!(ROUNDING);
40220        let mut fnmadd: f32 = simd_extract!(a, 0);
40221        if (k & 0b00000001) != 0 {
40222            let extracta = -fnmadd;
40223            let extractb: f32 = simd_extract!(b, 0);
40224            let extractc: f32 = simd_extract!(c, 0);
40225            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40226        }
40227        simd_insert!(a, 0, fnmadd)
40228    }
40229}
40230
40231/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40232///
40233/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40234/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40235/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40236/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40237/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40238/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40239///
40240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40241#[inline]
40242#[target_feature(enable = "avx512f")]
40243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40244#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40245#[rustc_legacy_const_generics(4)]
40246pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40247    k: __mmask8,
40248    a: __m128,
40249    b: __m128,
40250    c: __m128,
40251) -> __m128 {
40252    unsafe {
40253        static_assert_rounding!(ROUNDING);
40254        let mut fnmadd: f32 = 0.;
40255        if (k & 0b00000001) != 0 {
40256            let extracta: f32 = simd_extract!(a, 0);
40257            let extracta = -extracta;
40258            let extractb: f32 = simd_extract!(b, 0);
40259            let extractc: f32 = simd_extract!(c, 0);
40260            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40261        }
40262        simd_insert!(a, 0, fnmadd)
40263    }
40264}
40265
40266/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40267///
40268/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40269/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40270/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40271/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40272/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40273/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40274///
40275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40276#[inline]
40277#[target_feature(enable = "avx512f")]
40278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40279#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40280#[rustc_legacy_const_generics(4)]
40281pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40282    a: __m128,
40283    b: __m128,
40284    c: __m128,
40285    k: __mmask8,
40286) -> __m128 {
40287    unsafe {
40288        static_assert_rounding!(ROUNDING);
40289        let mut fnmadd: f32 = simd_extract!(c, 0);
40290        if (k & 0b00000001) != 0 {
40291            let extracta: f32 = simd_extract!(a, 0);
40292            let extracta = -extracta;
40293            let extractb: f32 = simd_extract!(b, 0);
40294            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
40295        }
40296        simd_insert!(c, 0, fnmadd)
40297    }
40298}
40299
40300/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40301///
40302/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40303/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40304/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40305/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40306/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40307/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40308///
40309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40310#[inline]
40311#[target_feature(enable = "avx512f")]
40312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40313#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40314#[rustc_legacy_const_generics(3)]
40315pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40316    unsafe {
40317        static_assert_rounding!(ROUNDING);
40318        let extracta: f64 = simd_extract!(a, 0);
40319        let extracta = -extracta;
40320        let extractb: f64 = simd_extract!(b, 0);
40321        let extractc: f64 = simd_extract!(c, 0);
40322        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40323        simd_insert!(a, 0, fnmadd)
40324    }
40325}
40326
40327/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40328///
40329/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40330/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40331/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40332/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40333/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40334/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40335///
40336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40337#[inline]
40338#[target_feature(enable = "avx512f")]
40339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40340#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40341#[rustc_legacy_const_generics(4)]
40342pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40343    a: __m128d,
40344    k: __mmask8,
40345    b: __m128d,
40346    c: __m128d,
40347) -> __m128d {
40348    unsafe {
40349        static_assert_rounding!(ROUNDING);
40350        let mut fnmadd: f64 = simd_extract!(a, 0);
40351        if (k & 0b00000001) != 0 {
40352            let extracta = -fnmadd;
40353            let extractb: f64 = simd_extract!(b, 0);
40354            let extractc: f64 = simd_extract!(c, 0);
40355            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40356        }
40357        simd_insert!(a, 0, fnmadd)
40358    }
40359}
40360
40361/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40362///
40363/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40364/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40365/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40366/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40367/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40368/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40369///
40370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40371#[inline]
40372#[target_feature(enable = "avx512f")]
40373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40374#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40375#[rustc_legacy_const_generics(4)]
40376pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40377    k: __mmask8,
40378    a: __m128d,
40379    b: __m128d,
40380    c: __m128d,
40381) -> __m128d {
40382    unsafe {
40383        static_assert_rounding!(ROUNDING);
40384        let mut fnmadd: f64 = 0.;
40385        if (k & 0b00000001) != 0 {
40386            let extracta: f64 = simd_extract!(a, 0);
40387            let extracta = -extracta;
40388            let extractb: f64 = simd_extract!(b, 0);
40389            let extractc: f64 = simd_extract!(c, 0);
40390            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40391        }
40392        simd_insert!(a, 0, fnmadd)
40393    }
40394}
40395
40396/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40397///
40398/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40399/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40400/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40401/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40402/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40403/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40404///
40405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40406#[inline]
40407#[target_feature(enable = "avx512f")]
40408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40409#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40410#[rustc_legacy_const_generics(4)]
40411pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40412    a: __m128d,
40413    b: __m128d,
40414    c: __m128d,
40415    k: __mmask8,
40416) -> __m128d {
40417    unsafe {
40418        static_assert_rounding!(ROUNDING);
40419        let mut fnmadd: f64 = simd_extract!(c, 0);
40420        if (k & 0b00000001) != 0 {
40421            let extracta: f64 = simd_extract!(a, 0);
40422            let extracta = -extracta;
40423            let extractb: f64 = simd_extract!(b, 0);
40424            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
40425        }
40426        simd_insert!(c, 0, fnmadd)
40427    }
40428}
40429
40430/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40431///
40432/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40433/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40434/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40435/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40436/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40437/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40438///
40439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40440#[inline]
40441#[target_feature(enable = "avx512f")]
40442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40443#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40444#[rustc_legacy_const_generics(3)]
40445pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40446    unsafe {
40447        static_assert_rounding!(ROUNDING);
40448        let extracta: f32 = simd_extract!(a, 0);
40449        let extracta = -extracta;
40450        let extractb: f32 = simd_extract!(b, 0);
40451        let extractc: f32 = simd_extract!(c, 0);
40452        let extractc = -extractc;
40453        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40454        simd_insert!(a, 0, fnmsub)
40455    }
40456}
40457
40458/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40459///
40460/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40461/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40462/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40463/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40464/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40465/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40466///
40467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40468#[inline]
40469#[target_feature(enable = "avx512f")]
40470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40471#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40472#[rustc_legacy_const_generics(4)]
40473pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40474    a: __m128,
40475    k: __mmask8,
40476    b: __m128,
40477    c: __m128,
40478) -> __m128 {
40479    unsafe {
40480        static_assert_rounding!(ROUNDING);
40481        let mut fnmsub: f32 = simd_extract!(a, 0);
40482        if (k & 0b00000001) != 0 {
40483            let extracta = -fnmsub;
40484            let extractb: f32 = simd_extract!(b, 0);
40485            let extractc: f32 = simd_extract!(c, 0);
40486            let extractc = -extractc;
40487            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40488        }
40489        simd_insert!(a, 0, fnmsub)
40490    }
40491}
40492
40493/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40494///
40495/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40496/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40497/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40498/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40499/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40500/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40501///
40502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40503#[inline]
40504#[target_feature(enable = "avx512f")]
40505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40506#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40507#[rustc_legacy_const_generics(4)]
40508pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40509    k: __mmask8,
40510    a: __m128,
40511    b: __m128,
40512    c: __m128,
40513) -> __m128 {
40514    unsafe {
40515        static_assert_rounding!(ROUNDING);
40516        let mut fnmsub: f32 = 0.;
40517        if (k & 0b00000001) != 0 {
40518            let extracta: f32 = simd_extract!(a, 0);
40519            let extracta = -extracta;
40520            let extractb: f32 = simd_extract!(b, 0);
40521            let extractc: f32 = simd_extract!(c, 0);
40522            let extractc = -extractc;
40523            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40524        }
40525        simd_insert!(a, 0, fnmsub)
40526    }
40527}
40528
40529/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40530///
40531/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40532/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40533/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40534/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40535/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40536/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40537///
40538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40539#[inline]
40540#[target_feature(enable = "avx512f")]
40541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40542#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40543#[rustc_legacy_const_generics(4)]
40544pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40545    a: __m128,
40546    b: __m128,
40547    c: __m128,
40548    k: __mmask8,
40549) -> __m128 {
40550    unsafe {
40551        static_assert_rounding!(ROUNDING);
40552        let mut fnmsub: f32 = simd_extract!(c, 0);
40553        if (k & 0b00000001) != 0 {
40554            let extracta: f32 = simd_extract!(a, 0);
40555            let extracta = -extracta;
40556            let extractb: f32 = simd_extract!(b, 0);
40557            let extractc = -fnmsub;
40558            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40559        }
40560        simd_insert!(c, 0, fnmsub)
40561    }
40562}
40563
40564/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40565///
40566/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40567/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40568/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40569/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40570/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40571/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40572///
40573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40574#[inline]
40575#[target_feature(enable = "avx512f")]
40576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40577#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40578#[rustc_legacy_const_generics(3)]
40579pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40580    unsafe {
40581        static_assert_rounding!(ROUNDING);
40582        let extracta: f64 = simd_extract!(a, 0);
40583        let extracta = -extracta;
40584        let extractb: f64 = simd_extract!(b, 0);
40585        let extractc: f64 = simd_extract!(c, 0);
40586        let extractc = -extractc;
40587        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40588        simd_insert!(a, 0, fnmsub)
40589    }
40590}
40591
40592/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40593///
40594/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40595/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40596/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40597/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40598/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40599/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40600///
40601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40602#[inline]
40603#[target_feature(enable = "avx512f")]
40604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40605#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40606#[rustc_legacy_const_generics(4)]
40607pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40608    a: __m128d,
40609    k: __mmask8,
40610    b: __m128d,
40611    c: __m128d,
40612) -> __m128d {
40613    unsafe {
40614        static_assert_rounding!(ROUNDING);
40615        let mut fnmsub: f64 = simd_extract!(a, 0);
40616        if (k & 0b00000001) != 0 {
40617            let extracta = -fnmsub;
40618            let extractb: f64 = simd_extract!(b, 0);
40619            let extractc: f64 = simd_extract!(c, 0);
40620            let extractc = -extractc;
40621            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40622        }
40623        simd_insert!(a, 0, fnmsub)
40624    }
40625}
40626
40627/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40628///
40629/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40630/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40631/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40632/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40633/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40634/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40635///
40636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40637#[inline]
40638#[target_feature(enable = "avx512f")]
40639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40640#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40641#[rustc_legacy_const_generics(4)]
40642pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40643    k: __mmask8,
40644    a: __m128d,
40645    b: __m128d,
40646    c: __m128d,
40647) -> __m128d {
40648    unsafe {
40649        static_assert_rounding!(ROUNDING);
40650        let mut fnmsub: f64 = 0.;
40651        if (k & 0b00000001) != 0 {
40652            let extracta: f64 = simd_extract!(a, 0);
40653            let extracta = -extracta;
40654            let extractb: f64 = simd_extract!(b, 0);
40655            let extractc: f64 = simd_extract!(c, 0);
40656            let extractc = -extractc;
40657            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40658        }
40659        simd_insert!(a, 0, fnmsub)
40660    }
40661}
40662
40663/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40664///
40665/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40666/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40667/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40668/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40669/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40670/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40671///
40672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40673#[inline]
40674#[target_feature(enable = "avx512f")]
40675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40676#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40677#[rustc_legacy_const_generics(4)]
40678pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40679    a: __m128d,
40680    b: __m128d,
40681    c: __m128d,
40682    k: __mmask8,
40683) -> __m128d {
40684    unsafe {
40685        static_assert_rounding!(ROUNDING);
40686        let mut fnmsub: f64 = simd_extract!(c, 0);
40687        if (k & 0b00000001) != 0 {
40688            let extracta: f64 = simd_extract!(a, 0);
40689            let extracta = -extracta;
40690            let extractb: f64 = simd_extract!(b, 0);
40691            let extractc = -fnmsub;
40692            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40693        }
40694        simd_insert!(c, 0, fnmsub)
40695    }
40696}
40697
40698/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40699///
40700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40701#[inline]
40702#[target_feature(enable = "avx512f")]
40703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40704#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40705#[rustc_legacy_const_generics(3)]
40706pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40707    unsafe {
40708        static_assert_uimm_bits!(IMM8, 8);
40709        let a = a.as_f32x4();
40710        let b = b.as_f32x4();
40711        let c = c.as_i32x4();
40712        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40713        let fixupimm: f32 = simd_extract!(r, 0);
40714        let r = simd_insert!(a, 0, fixupimm);
40715        transmute(r)
40716    }
40717}
40718
40719/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40720///
40721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40722#[inline]
40723#[target_feature(enable = "avx512f")]
40724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40725#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40726#[rustc_legacy_const_generics(4)]
40727pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40728    a: __m128,
40729    k: __mmask8,
40730    b: __m128,
40731    c: __m128i,
40732) -> __m128 {
40733    unsafe {
40734        static_assert_uimm_bits!(IMM8, 8);
40735        let a = a.as_f32x4();
40736        let b = b.as_f32x4();
40737        let c = c.as_i32x4();
40738        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40739        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40740        let r = simd_insert!(a, 0, fixupimm);
40741        transmute(r)
40742    }
40743}
40744
40745/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40746///
40747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40748#[inline]
40749#[target_feature(enable = "avx512f")]
40750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40751#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40752#[rustc_legacy_const_generics(4)]
40753pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40754    k: __mmask8,
40755    a: __m128,
40756    b: __m128,
40757    c: __m128i,
40758) -> __m128 {
40759    unsafe {
40760        static_assert_uimm_bits!(IMM8, 8);
40761        let a = a.as_f32x4();
40762        let b = b.as_f32x4();
40763        let c = c.as_i32x4();
40764        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40765        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40766        let r = simd_insert!(a, 0, fixupimm);
40767        transmute(r)
40768    }
40769}
40770
40771/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40772///
40773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40774#[inline]
40775#[target_feature(enable = "avx512f")]
40776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40777#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40778#[rustc_legacy_const_generics(3)]
40779pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40780    unsafe {
40781        static_assert_uimm_bits!(IMM8, 8);
40782        let a = a.as_f64x2();
40783        let b = b.as_f64x2();
40784        let c = c.as_i64x2();
40785        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40786        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40787        let r = simd_insert!(a, 0, fixupimm);
40788        transmute(r)
40789    }
40790}
40791
40792/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40793///
40794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40795#[inline]
40796#[target_feature(enable = "avx512f")]
40797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40798#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40799#[rustc_legacy_const_generics(4)]
40800pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40801    a: __m128d,
40802    k: __mmask8,
40803    b: __m128d,
40804    c: __m128i,
40805) -> __m128d {
40806    unsafe {
40807        static_assert_uimm_bits!(IMM8, 8);
40808        let a = a.as_f64x2();
40809        let b = b.as_f64x2();
40810        let c = c.as_i64x2();
40811        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40812        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40813        let r = simd_insert!(a, 0, fixupimm);
40814        transmute(r)
40815    }
40816}
40817
40818/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40819///
40820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40821#[inline]
40822#[target_feature(enable = "avx512f")]
40823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40824#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40825#[rustc_legacy_const_generics(4)]
40826pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40827    k: __mmask8,
40828    a: __m128d,
40829    b: __m128d,
40830    c: __m128i,
40831) -> __m128d {
40832    unsafe {
40833        static_assert_uimm_bits!(IMM8, 8);
40834        let a = a.as_f64x2();
40835        let b = b.as_f64x2();
40836        let c = c.as_i64x2();
40837        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40838        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40839        let r = simd_insert!(a, 0, fixupimm);
40840        transmute(r)
40841    }
40842}
40843
40844/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40846///
40847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40848#[inline]
40849#[target_feature(enable = "avx512f")]
40850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40851#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40852#[rustc_legacy_const_generics(3, 4)]
40853pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40854    a: __m128,
40855    b: __m128,
40856    c: __m128i,
40857) -> __m128 {
40858    unsafe {
40859        static_assert_uimm_bits!(IMM8, 8);
40860        static_assert_mantissas_sae!(SAE);
40861        let a = a.as_f32x4();
40862        let b = b.as_f32x4();
40863        let c = c.as_i32x4();
40864        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
40865        let fixupimm: f32 = simd_extract!(r, 0);
40866        let r = simd_insert!(a, 0, fixupimm);
40867        transmute(r)
40868    }
40869}
40870
40871/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40872/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40873///
40874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40875#[inline]
40876#[target_feature(enable = "avx512f")]
40877#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40878#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40879#[rustc_legacy_const_generics(4, 5)]
40880pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40881    a: __m128,
40882    k: __mmask8,
40883    b: __m128,
40884    c: __m128i,
40885) -> __m128 {
40886    unsafe {
40887        static_assert_uimm_bits!(IMM8, 8);
40888        static_assert_mantissas_sae!(SAE);
40889        let a = a.as_f32x4();
40890        let b = b.as_f32x4();
40891        let c = c.as_i32x4();
40892        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
40893        let fixupimm: f32 = simd_extract!(r, 0);
40894        let r = simd_insert!(a, 0, fixupimm);
40895        transmute(r)
40896    }
40897}
40898
40899/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40900/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40901///
40902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40903#[inline]
40904#[target_feature(enable = "avx512f")]
40905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40906#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40907#[rustc_legacy_const_generics(4, 5)]
40908pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40909    k: __mmask8,
40910    a: __m128,
40911    b: __m128,
40912    c: __m128i,
40913) -> __m128 {
40914    unsafe {
40915        static_assert_uimm_bits!(IMM8, 8);
40916        static_assert_mantissas_sae!(SAE);
40917        let a = a.as_f32x4();
40918        let b = b.as_f32x4();
40919        let c = c.as_i32x4();
40920        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
40921        let fixupimm: f32 = simd_extract!(r, 0);
40922        let r = simd_insert!(a, 0, fixupimm);
40923        transmute(r)
40924    }
40925}
40926
40927/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40928/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40929///
40930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
40931#[inline]
40932#[target_feature(enable = "avx512f")]
40933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40934#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40935#[rustc_legacy_const_generics(3, 4)]
40936pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40937    a: __m128d,
40938    b: __m128d,
40939    c: __m128i,
40940) -> __m128d {
40941    unsafe {
40942        static_assert_uimm_bits!(IMM8, 8);
40943        static_assert_mantissas_sae!(SAE);
40944        let a = a.as_f64x2();
40945        let b = b.as_f64x2();
40946        let c = c.as_i64x2();
40947        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
40948        let fixupimm: f64 = simd_extract!(r, 0);
40949        let r = simd_insert!(a, 0, fixupimm);
40950        transmute(r)
40951    }
40952}
40953
40954/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40955/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40956///
40957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
40958#[inline]
40959#[target_feature(enable = "avx512f")]
40960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40961#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40962#[rustc_legacy_const_generics(4, 5)]
40963pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40964    a: __m128d,
40965    k: __mmask8,
40966    b: __m128d,
40967    c: __m128i,
40968) -> __m128d {
40969    unsafe {
40970        static_assert_uimm_bits!(IMM8, 8);
40971        static_assert_mantissas_sae!(SAE);
40972        let a = a.as_f64x2();
40973        let b = b.as_f64x2();
40974        let c = c.as_i64x2();
40975        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
40976        let fixupimm: f64 = simd_extract!(r, 0);
40977        let r = simd_insert!(a, 0, fixupimm);
40978        transmute(r)
40979    }
40980}
40981
40982/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40983/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40984///
40985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
40986#[inline]
40987#[target_feature(enable = "avx512f")]
40988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40989#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40990#[rustc_legacy_const_generics(4, 5)]
40991pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40992    k: __mmask8,
40993    a: __m128d,
40994    b: __m128d,
40995    c: __m128i,
40996) -> __m128d {
40997    unsafe {
40998        static_assert_uimm_bits!(IMM8, 8);
40999        static_assert_mantissas_sae!(SAE);
41000        let a = a.as_f64x2();
41001        let b = b.as_f64x2();
41002        let c = c.as_i64x2();
41003        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
41004        let fixupimm: f64 = simd_extract!(r, 0);
41005        let r = simd_insert!(a, 0, fixupimm);
41006        transmute(r)
41007    }
41008}
41009
41010/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41011///
41012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41013#[inline]
41014#[target_feature(enable = "avx512f")]
41015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41016#[cfg_attr(test, assert_instr(vcvtss2sd))]
41017pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41018    unsafe {
41019        transmute(vcvtss2sd(
41020            a.as_f64x2(),
41021            b.as_f32x4(),
41022            src.as_f64x2(),
41023            k,
41024            _MM_FROUND_CUR_DIRECTION,
41025        ))
41026    }
41027}
41028
41029/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41030///
41031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41032#[inline]
41033#[target_feature(enable = "avx512f")]
41034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41035#[cfg_attr(test, assert_instr(vcvtss2sd))]
41036pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41037    unsafe {
41038        transmute(vcvtss2sd(
41039            a.as_f64x2(),
41040            b.as_f32x4(),
41041            f64x2::ZERO,
41042            k,
41043            _MM_FROUND_CUR_DIRECTION,
41044        ))
41045    }
41046}
41047
41048/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41049///
41050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41051#[inline]
41052#[target_feature(enable = "avx512f")]
41053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41054#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41055pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41056    unsafe {
41057        transmute(vcvtsd2ss(
41058            a.as_f32x4(),
41059            b.as_f64x2(),
41060            src.as_f32x4(),
41061            k,
41062            _MM_FROUND_CUR_DIRECTION,
41063        ))
41064    }
41065}
41066
41067/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41068///
41069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41070#[inline]
41071#[target_feature(enable = "avx512f")]
41072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41073#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41074pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41075    unsafe {
41076        transmute(vcvtsd2ss(
41077            a.as_f32x4(),
41078            b.as_f64x2(),
41079            f32x4::ZERO,
41080            k,
41081            _MM_FROUND_CUR_DIRECTION,
41082        ))
41083    }
41084}
41085
41086/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41087/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41088///
41089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41090#[inline]
41091#[target_feature(enable = "avx512f")]
41092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41093#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41094#[rustc_legacy_const_generics(2)]
41095pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41096    unsafe {
41097        static_assert_sae!(SAE);
41098        let a = a.as_f64x2();
41099        let b = b.as_f32x4();
41100        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
41101        transmute(r)
41102    }
41103}
41104
41105/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41107///
41108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41109#[inline]
41110#[target_feature(enable = "avx512f")]
41111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41112#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41113#[rustc_legacy_const_generics(4)]
41114pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41115    src: __m128d,
41116    k: __mmask8,
41117    a: __m128d,
41118    b: __m128,
41119) -> __m128d {
41120    unsafe {
41121        static_assert_sae!(SAE);
41122        let a = a.as_f64x2();
41123        let b = b.as_f32x4();
41124        let src = src.as_f64x2();
41125        let r = vcvtss2sd(a, b, src, k, SAE);
41126        transmute(r)
41127    }
41128}
41129
41130/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41132///
41133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41134#[inline]
41135#[target_feature(enable = "avx512f")]
41136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41137#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41138#[rustc_legacy_const_generics(3)]
41139pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41140    unsafe {
41141        static_assert_sae!(SAE);
41142        let a = a.as_f64x2();
41143        let b = b.as_f32x4();
41144        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
41145        transmute(r)
41146    }
41147}
41148
41149/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41156///
41157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41158#[inline]
41159#[target_feature(enable = "avx512f")]
41160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41161#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41162#[rustc_legacy_const_generics(2)]
41163pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41164    unsafe {
41165        static_assert_rounding!(ROUNDING);
41166        let a = a.as_f32x4();
41167        let b = b.as_f64x2();
41168        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41169        transmute(r)
41170    }
41171}
41172
41173/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41174/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41175/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41176/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41177/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41178/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41179/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41180///
41181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41182#[inline]
41183#[target_feature(enable = "avx512f")]
41184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41185#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41186#[rustc_legacy_const_generics(4)]
41187pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41188    src: __m128,
41189    k: __mmask8,
41190    a: __m128,
41191    b: __m128d,
41192) -> __m128 {
41193    unsafe {
41194        static_assert_rounding!(ROUNDING);
41195        let a = a.as_f32x4();
41196        let b = b.as_f64x2();
41197        let src = src.as_f32x4();
41198        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
41199        transmute(r)
41200    }
41201}
41202
41203/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41210///
41211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41212#[inline]
41213#[target_feature(enable = "avx512f")]
41214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41215#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41216#[rustc_legacy_const_generics(3)]
41217pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41218    unsafe {
41219        static_assert_rounding!(ROUNDING);
41220        let a = a.as_f32x4();
41221        let b = b.as_f64x2();
41222        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
41223        transmute(r)
41224    }
41225}
41226
41227/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41228/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41229/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41230/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41231/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41232/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41233/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41234///
41235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41236#[inline]
41237#[target_feature(enable = "avx512f")]
41238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41239#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41240#[rustc_legacy_const_generics(1)]
41241pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41242    unsafe {
41243        static_assert_rounding!(ROUNDING);
41244        let a = a.as_f32x4();
41245        vcvtss2si(a, ROUNDING)
41246    }
41247}
41248
41249/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41250/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41251/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41252/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41253/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41254/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41255/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41256///
41257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41258#[inline]
41259#[target_feature(enable = "avx512f")]
41260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41261#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41262#[rustc_legacy_const_generics(1)]
41263pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41264    unsafe {
41265        static_assert_rounding!(ROUNDING);
41266        let a = a.as_f32x4();
41267        vcvtss2si(a, ROUNDING)
41268    }
41269}
41270
41271/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41272/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41278///
41279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41280#[inline]
41281#[target_feature(enable = "avx512f")]
41282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41283#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41284#[rustc_legacy_const_generics(1)]
41285pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41286    unsafe {
41287        static_assert_rounding!(ROUNDING);
41288        let a = a.as_f32x4();
41289        vcvtss2usi(a, ROUNDING)
41290    }
41291}
41292
41293/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41294///
41295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41296#[inline]
41297#[target_feature(enable = "avx512f")]
41298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41299#[cfg_attr(test, assert_instr(vcvtss2si))]
41300pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41301    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41302}
41303
41304/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41305///
41306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41307#[inline]
41308#[target_feature(enable = "avx512f")]
41309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41310#[cfg_attr(test, assert_instr(vcvtss2usi))]
41311pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41312    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41313}
41314
41315/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41316/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41317/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41318/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41319/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41320/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41321/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41322///
41323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41324#[inline]
41325#[target_feature(enable = "avx512f")]
41326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41327#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41328#[rustc_legacy_const_generics(1)]
41329pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41330    unsafe {
41331        static_assert_rounding!(ROUNDING);
41332        let a = a.as_f64x2();
41333        vcvtsd2si(a, ROUNDING)
41334    }
41335}
41336
41337/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41338/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41339/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41340/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41341/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41342/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41343/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41344///
41345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41346#[inline]
41347#[target_feature(enable = "avx512f")]
41348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41349#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41350#[rustc_legacy_const_generics(1)]
41351pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41352    unsafe {
41353        static_assert_rounding!(ROUNDING);
41354        let a = a.as_f64x2();
41355        vcvtsd2si(a, ROUNDING)
41356    }
41357}
41358
41359/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41360/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41361/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41362/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41363/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41364/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41365/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41366///
41367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41368#[inline]
41369#[target_feature(enable = "avx512f")]
41370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41371#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41372#[rustc_legacy_const_generics(1)]
41373pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41374    unsafe {
41375        static_assert_rounding!(ROUNDING);
41376        let a = a.as_f64x2();
41377        vcvtsd2usi(a, ROUNDING)
41378    }
41379}
41380
41381/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41382///
41383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41384#[inline]
41385#[target_feature(enable = "avx512f")]
41386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41387#[cfg_attr(test, assert_instr(vcvtsd2si))]
41388pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41389    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41390}
41391
41392/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41393///
41394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41395#[inline]
41396#[target_feature(enable = "avx512f")]
41397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41398#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41399pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41400    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41401}
41402
41403/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41404///
41405/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41406/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41407/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41408/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41409/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41410/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41411///
41412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41413#[inline]
41414#[target_feature(enable = "avx512f")]
41415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41416#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41417#[rustc_legacy_const_generics(2)]
41418pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41419    unsafe {
41420        static_assert_rounding!(ROUNDING);
41421        let a = a.as_f32x4();
41422        let r = vcvtsi2ss(a, b, ROUNDING);
41423        transmute(r)
41424    }
41425}
41426
41427/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41428///
41429/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41430/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41431/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41432/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41433/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41434/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41435///
41436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41437#[inline]
41438#[target_feature(enable = "avx512f")]
41439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41440#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41441#[rustc_legacy_const_generics(2)]
41442pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41443    unsafe {
41444        static_assert_rounding!(ROUNDING);
41445        let a = a.as_f32x4();
41446        let r = vcvtsi2ss(a, b, ROUNDING);
41447        transmute(r)
41448    }
41449}
41450
41451/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41452/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41458///
41459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41460#[inline]
41461#[target_feature(enable = "avx512f")]
41462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41463#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41464#[rustc_legacy_const_generics(2)]
41465pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41466    unsafe {
41467        static_assert_rounding!(ROUNDING);
41468        let a = a.as_f32x4();
41469        let r = vcvtusi2ss(a, b, ROUNDING);
41470        transmute(r)
41471    }
41472}
41473
41474/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41475///
41476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41477#[inline]
41478#[target_feature(enable = "avx512f")]
41479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41480#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41481pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41482    unsafe {
41483        let b = b as f32;
41484        simd_insert!(a, 0, b)
41485    }
41486}
41487
41488/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41489///
41490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41491#[inline]
41492#[target_feature(enable = "avx512f")]
41493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41494#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41495pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41496    unsafe {
41497        let b = b as f64;
41498        simd_insert!(a, 0, b)
41499    }
41500}
41501
41502/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41503/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41504///
41505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41506#[inline]
41507#[target_feature(enable = "avx512f")]
41508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41509#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41510#[rustc_legacy_const_generics(1)]
41511pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41512    unsafe {
41513        static_assert_sae!(SAE);
41514        let a = a.as_f32x4();
41515        vcvttss2si(a, SAE)
41516    }
41517}
41518
41519/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41520/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41521///
41522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41523#[inline]
41524#[target_feature(enable = "avx512f")]
41525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41526#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41527#[rustc_legacy_const_generics(1)]
41528pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41529    unsafe {
41530        static_assert_sae!(SAE);
41531        let a = a.as_f32x4();
41532        vcvttss2si(a, SAE)
41533    }
41534}
41535
41536/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41537/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41538///
41539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41540#[inline]
41541#[target_feature(enable = "avx512f")]
41542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41543#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41544#[rustc_legacy_const_generics(1)]
41545pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41546    unsafe {
41547        static_assert_sae!(SAE);
41548        let a = a.as_f32x4();
41549        vcvttss2usi(a, SAE)
41550    }
41551}
41552
41553/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41554///
41555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41556#[inline]
41557#[target_feature(enable = "avx512f")]
41558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41559#[cfg_attr(test, assert_instr(vcvttss2si))]
41560pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41561    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41562}
41563
41564/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41565///
41566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41567#[inline]
41568#[target_feature(enable = "avx512f")]
41569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41570#[cfg_attr(test, assert_instr(vcvttss2usi))]
41571pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41572    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41573}
41574
41575/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41576/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41577///
41578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41579#[inline]
41580#[target_feature(enable = "avx512f")]
41581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41582#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41583#[rustc_legacy_const_generics(1)]
41584pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41585    unsafe {
41586        static_assert_sae!(SAE);
41587        let a = a.as_f64x2();
41588        vcvttsd2si(a, SAE)
41589    }
41590}
41591
41592/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41593/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41594///
41595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41596#[inline]
41597#[target_feature(enable = "avx512f")]
41598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41599#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41600#[rustc_legacy_const_generics(1)]
41601pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41602    unsafe {
41603        static_assert_sae!(SAE);
41604        let a = a.as_f64x2();
41605        vcvttsd2si(a, SAE)
41606    }
41607}
41608
41609/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41610/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41611///
41612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41613#[inline]
41614#[target_feature(enable = "avx512f")]
41615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41616#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41617#[rustc_legacy_const_generics(1)]
41618pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41619    unsafe {
41620        static_assert_sae!(SAE);
41621        let a = a.as_f64x2();
41622        vcvttsd2usi(a, SAE)
41623    }
41624}
41625
41626/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41627///
41628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41629#[inline]
41630#[target_feature(enable = "avx512f")]
41631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41632#[cfg_attr(test, assert_instr(vcvttsd2si))]
41633pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41634    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41635}
41636
41637/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41638///
41639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41640#[inline]
41641#[target_feature(enable = "avx512f")]
41642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41643#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41644pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41645    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41646}
41647
41648/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41649///
41650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41651#[inline]
41652#[target_feature(enable = "avx512f")]
41653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41654#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41655pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41656    unsafe {
41657        let b = b as f32;
41658        simd_insert!(a, 0, b)
41659    }
41660}
41661
41662/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41663///
41664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41665#[inline]
41666#[target_feature(enable = "avx512f")]
41667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41668#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41669pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41670    unsafe {
41671        let b = b as f64;
41672        simd_insert!(a, 0, b)
41673    }
41674}
41675
41676/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41677/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41678///
41679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41680#[inline]
41681#[target_feature(enable = "avx512f")]
41682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41683#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41684#[rustc_legacy_const_generics(2, 3)]
41685pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41686    unsafe {
41687        static_assert_uimm_bits!(IMM5, 5);
41688        static_assert_mantissas_sae!(SAE);
41689        let a = a.as_f32x4();
41690        let b = b.as_f32x4();
41691        vcomiss(a, b, IMM5, SAE)
41692    }
41693}
41694
41695/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41696/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41697///
41698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41699#[inline]
41700#[target_feature(enable = "avx512f")]
41701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41702#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41703#[rustc_legacy_const_generics(2, 3)]
41704pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41705    unsafe {
41706        static_assert_uimm_bits!(IMM5, 5);
41707        static_assert_mantissas_sae!(SAE);
41708        let a = a.as_f64x2();
41709        let b = b.as_f64x2();
41710        vcomisd(a, b, IMM5, SAE)
41711    }
41712}
41713
41714/// Equal
41715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41716pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41717/// Less-than
41718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41719pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41720/// Less-than-or-equal
41721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41722pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41723/// False
41724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41725pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41726/// Not-equal
41727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41728pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41729/// Not less-than
41730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41731pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41732/// Not less-than-or-equal
41733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41734pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41735/// True
41736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41737pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41738
41739/// interval [1, 2)
41740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41741pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41742/// interval [0.5, 2)
41743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41744pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41745/// interval [0.5, 1)
41746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41747pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41748/// interval [0.75, 1.5)
41749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41750pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41751
41752/// sign = sign(SRC)
41753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41754pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41755/// sign = 0
41756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41757pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41758/// DEST = NaN if sign(SRC) = 1
41759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41760pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41761
41762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41763pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41765pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41767pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41769pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41771pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41773pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41775pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41777pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41779pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41781pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41783pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41785pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41787pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41789pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41791pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41793pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41795pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41797pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41799pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41801pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41803pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41805pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41807pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41809pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41811pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41813pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41815pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41817pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41819pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41821pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41823pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41825pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41827pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41829pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41831pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41833pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41835pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41837pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41839pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41841pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41843pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41845pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41847pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41849pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41851pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41853pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41855pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41857pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41859pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41861pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41863pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41865pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41867pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41869pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41871pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41873pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41875pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41877pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41879pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41881pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41883pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41885pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41887pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41889pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41891pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41893pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41895pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41897pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41899pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41901pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41903pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41905pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41907pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
41908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41909pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
41910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41911pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
41912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41913pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
41914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41915pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
41916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41917pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
41918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41919pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
41920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41921pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
41922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41923pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
41924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41925pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
41926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41927pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
41928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41929pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
41930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41931pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
41932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41933pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
41934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41935pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
41936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41937pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
41938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41939pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
41940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41941pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
41942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41943pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
41944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41945pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
41946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41947pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
41948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41949pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
41950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41951pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
41952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41953pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
41954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41955pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
41956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41957pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
41958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41959pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
41960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41961pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
41962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41963pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
41964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41965pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
41966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41967pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
41968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41969pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
41970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41971pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
41972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41973pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
41974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41975pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
41976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41977pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
41978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41979pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
41980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41981pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
41982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41983pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
41984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41985pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
41986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41987pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
41988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41989pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
41990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41991pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
41992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41993pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
41994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41995pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
41996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41997pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
41998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41999pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
42000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42001pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
42002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42003pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42005pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42007pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42009pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42011pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42013pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42015pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42017pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42019pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42021pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42023pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42025pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42027pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42029pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42031pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42033pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42035pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42037pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42039pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42041pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42043pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42045pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42047pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42049pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42051pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42053pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42055pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42057pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42059pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42061pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42063pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42065pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42067pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42069pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42071pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42073pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42075pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42077pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42079pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42081pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42083pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42085pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42087pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42089pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42091pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42093pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42095pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42097pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42099pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42101pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42103pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42105pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42107pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42109pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42111pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42113pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42115pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42117pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42119pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42121pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42123pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42125pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42127pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42129pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42131pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42133pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42135pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42137pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42139pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42141pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42143pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42145pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42147pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42149pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42151pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42153pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42155pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42157pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42159pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42161pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42163pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42165pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42167pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42169pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42171pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42173pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42175pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42177pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42179pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42181pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42183pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42185pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42187pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42189pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42191pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42193pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42195pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42197pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42199pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42201pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42203pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42205pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42207pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42209pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42211pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42213pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42215pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42217pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42219pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42221pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42223pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42225pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42227pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42229pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42231pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42233pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42235pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42237pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42239pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42241pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42243pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42245pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42247pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42249pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42251pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42253pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42255pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42257pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42259pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42261pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42263pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42265pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42267pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42269pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42271pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42273pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42274
42275#[allow(improper_ctypes)]
42276unsafe extern "C" {
42277    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42278    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42279    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42280    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42281
42282    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42283    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42284    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42285    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42286
42287    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42288    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42289    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42290    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42291
42292    #[link_name = "llvm.x86.avx512.add.ps.512"]
42293    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42294    #[link_name = "llvm.x86.avx512.add.pd.512"]
42295    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42296    #[link_name = "llvm.x86.avx512.sub.ps.512"]
42297    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42298    #[link_name = "llvm.x86.avx512.sub.pd.512"]
42299    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42300    #[link_name = "llvm.x86.avx512.mul.ps.512"]
42301    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42302    #[link_name = "llvm.x86.avx512.mul.pd.512"]
42303    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42304    #[link_name = "llvm.x86.avx512.div.ps.512"]
42305    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42306    #[link_name = "llvm.x86.avx512.div.pd.512"]
42307    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42308
42309    #[link_name = "llvm.x86.avx512.max.ps.512"]
42310    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42311    #[link_name = "llvm.x86.avx512.max.pd.512"]
42312    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42313    #[link_name = "llvm.x86.avx512.min.ps.512"]
42314    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42315    #[link_name = "llvm.x86.avx512.min.pd.512"]
42316    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42317
42318    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42319    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42320
42321    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42322    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42323    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42324    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42325
42326    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42327    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42328    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42329    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42330    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42331    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42332
42333    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42334    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42335    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42336    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42337    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42338    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42339
42340    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42341    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42342    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42343    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42344    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42345    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42346
42347    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42348    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42349    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42350    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42351    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42352    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42353
42354    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42355    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42356    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42357    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42358    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42359    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42360
42361    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42362    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42363    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42364    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42365    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42366    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42367
42368    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42369    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42370    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42371    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42372    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42373    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42374
42375    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42376    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42377    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42378    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42379    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42380    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42381
42382    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42383    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42384    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42385    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42386    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42387    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42388
42389    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42390    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42391    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42392    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42393    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42394    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42395
42396    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42397    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42398    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42399    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42400    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42401    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42402
42403    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42404    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42405    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42406    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42407    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42408    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42409
42410    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42411    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42412    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42413    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42414    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42415    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42416
42417    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42418    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42419    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42420    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42421    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42422    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42423
42424    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42425    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42426    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42427    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42428    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42429    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42430
42431    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42432    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42433    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42434    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42435    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42436    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42437
42438    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42439    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42440    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42441    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42442    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42443    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42444
42445    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42446    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42447
42448    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42449    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42450    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42451    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42452    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42453    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42454
42455    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42456    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42457    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42458    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42459
42460    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42461    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42462
42463    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42464    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42465    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42466    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42467    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42468    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42469
42470    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42471    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42472    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42473    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42474
42475    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42476    fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
42477    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42478    fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
42479    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42480    fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
42481
42482    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42483    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42484
42485    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42486    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42487    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42488    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42489    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42490    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42491
42492    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42493    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42494    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42495    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42496    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42497    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42498
42499    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42500    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42501    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42502    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42503    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42504    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42505
42506    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42507    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42508    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42509    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42510    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42511    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42512
42513    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42514    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42515    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42516    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42517    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42518    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42519
42520    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42521    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42522    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42523    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42524    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42525    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42526    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42527    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42528    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42529    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42530
42531    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42532    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42533    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42534    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42535    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42536    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42537
42538    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42539    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42540    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42541    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42542    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42543    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42544
42545    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42546    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42547    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42548    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42549    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42550    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42551
42552    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42553    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42554    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42555    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42556    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42557    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42558
42559    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42560    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42561    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42562    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42563    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42564    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42565
42566    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42567    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42568    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42569    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42570    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42571    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42572
42573    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42574    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42575    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42576    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42577    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42578    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42579
42580    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42581    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42582    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42583    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42584    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42585    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42586
42587    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42588    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42589    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42590    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42591    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42592    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42593
42594    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42595    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42596    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42597    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42598    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42599    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42600
42601    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42602    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42603    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42604    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42605    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42606    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42607
42608    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42609    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42610    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42611    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42612    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42613    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42614
42615    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42616    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42617    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42618    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42619    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42620    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42621
42622    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42623    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42624    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42625    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42626    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42627    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42628
42629    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42630    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42631    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42632    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42633    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42634    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42635
42636    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42637    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42638
42639    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42640    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42641    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42642    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42643    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42644    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42645
42646    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42647    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42648    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42649    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42650    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42651    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42652
42653    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42654    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42655    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42656    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42657    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42658    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42659
42660    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42661    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42662    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42663    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42664    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42665    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42666
42667    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42668    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42669    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42670    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42671    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42672    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42673
42674    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42675    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42676    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42677    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42678    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42679    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42680
42681    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42682    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42683    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42684    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42685    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42686    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42687
42688    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42689    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42690    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42691    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42692    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42693    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42694
42695    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42696    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42697    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42698    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42699    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42700    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42701
42702    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42703    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42704    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42705    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42706    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42707    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42708
42709    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42710    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42711    #[link_name = "llvm.x86.avx512.gather.dps.512"]
42712    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42713    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42714    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42715    #[link_name = "llvm.x86.avx512.gather.qps.512"]
42716    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42717    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42718    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42719    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42720    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42721    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42722    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42723    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42724    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42725
42726    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42727    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42728    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42729    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42730    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42731    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42732    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42733    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42734    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42735    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42736
42737    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42738    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42739    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42740    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42741    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42742    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42743
42744    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42745    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42746    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42747    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42748    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42749    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42750    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42751    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42752    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42753    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42754    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42755    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42756    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42757    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42758    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42759    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42760
42761    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42762    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42763    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42764    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42765    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42766    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42767    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42768    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42769    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42770    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42771    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42772    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42773    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42774    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42775    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42776    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42777
42778    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42779    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42780    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42781    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42782    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42783    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42784    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42785    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42786    #[link_name = "llvm.x86.avx512.gather3div4.si"]
42787    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42788    #[link_name = "llvm.x86.avx512.gather3div2.di"]
42789    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42790    #[link_name = "llvm.x86.avx512.gather3div2.df"]
42791    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42792    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42793    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42794
42795    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42796    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42797    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42798    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42799    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42800    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42801    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42802    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42803    #[link_name = "llvm.x86.avx512.gather3div8.si"]
42804    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42805    #[link_name = "llvm.x86.avx512.gather3div4.di"]
42806    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42807    #[link_name = "llvm.x86.avx512.gather3div4.df"]
42808    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42809    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42810    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42811
42812    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42813    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42814    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42815    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42816
42817    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42818    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42819    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42820    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42821    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42822    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42823
42824    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42825    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42826    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42827    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42828    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42829    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42830
42831    #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42832    fn vprold(a: i32x16, i8: i32) -> i32x16;
42833    #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42834    fn vprold256(a: i32x8, i8: i32) -> i32x8;
42835    #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42836    fn vprold128(a: i32x4, i8: i32) -> i32x4;
42837
42838    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42839    fn vprord(a: i32x16, i8: i32) -> i32x16;
42840    #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42841    fn vprord256(a: i32x8, i8: i32) -> i32x8;
42842    #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42843    fn vprord128(a: i32x4, i8: i32) -> i32x4;
42844
42845    #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42846    fn vprolq(a: i64x8, i8: i32) -> i64x8;
42847    #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42848    fn vprolq256(a: i64x4, i8: i32) -> i64x4;
42849    #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42850    fn vprolq128(a: i64x2, i8: i32) -> i64x2;
42851
42852    #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42853    fn vprorq(a: i64x8, i8: i32) -> i64x8;
42854    #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42855    fn vprorq256(a: i64x4, i8: i32) -> i64x4;
42856    #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42857    fn vprorq128(a: i64x2, i8: i32) -> i64x2;
42858
42859    #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42860    fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42861    #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42862    fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42863    #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42864    fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42865
42866    #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42867    fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42868    #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42869    fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42870    #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42871    fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42872
42873    #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42874    fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42875    #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42876    fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42877    #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42878    fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42879
42880    #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42881    fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42882    #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42883    fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42884    #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42885    fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42886
42887    #[link_name = "llvm.x86.avx512.psllv.d.512"]
42888    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42889    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
42890    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42891    #[link_name = "llvm.x86.avx512.psllv.q.512"]
42892    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42893    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
42894    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42895
42896    #[link_name = "llvm.x86.avx512.psll.d.512"]
42897    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
42898    #[link_name = "llvm.x86.avx512.psrl.d.512"]
42899    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42900    #[link_name = "llvm.x86.avx512.psll.q.512"]
42901    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42902    #[link_name = "llvm.x86.avx512.psrl.q.512"]
42903    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42904
42905    #[link_name = "llvm.x86.avx512.psra.d.512"]
42906    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42907
42908    #[link_name = "llvm.x86.avx512.psra.q.512"]
42909    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42910    #[link_name = "llvm.x86.avx512.psra.q.256"]
42911    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42912    #[link_name = "llvm.x86.avx512.psra.q.128"]
42913    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42914
42915    #[link_name = "llvm.x86.avx512.psrav.d.512"]
42916    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42917
42918    #[link_name = "llvm.x86.avx512.psrav.q.512"]
42919    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42920    #[link_name = "llvm.x86.avx512.psrav.q.256"]
42921    fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42922    #[link_name = "llvm.x86.avx512.psrav.q.128"]
42923    fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42924
42925    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
42926    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
42927    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
42928    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
42929
42930    #[link_name = "llvm.x86.avx512.permvar.si.512"]
42931    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
42932
42933    #[link_name = "llvm.x86.avx512.permvar.di.512"]
42934    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
42935    #[link_name = "llvm.x86.avx512.permvar.di.256"]
42936    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
42937
42938    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
42939    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
42940
42941    #[link_name = "llvm.x86.avx512.permvar.df.512"]
42942    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
42943    #[link_name = "llvm.x86.avx512.permvar.df.256"]
42944    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
42945
42946    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
42947    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
42948    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
42949    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
42950    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
42951    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
42952
42953    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
42954    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
42955    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
42956    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
42957    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
42958    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
42959
42960    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
42961    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
42962    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
42963    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
42964    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
42965    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
42966
42967    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
42968    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
42969    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
42970    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
42971    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
42972    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
42973
42974    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
42975    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
42976    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
42977    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
42978    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
42979    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
42980
42981    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
42982    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
42983    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
42984    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
42985    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
42986    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
42987
42988    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
42989    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
42990    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
42991    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
42992    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
42993    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
42994
42995    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
42996    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
42997    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
42998    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
42999    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
43000    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43001
43002    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
43003    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43004    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43005    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43006    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43007    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43008
43009    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43010    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43011    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43012    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43013    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43014    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43015
43016    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43017    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43018    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43019    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43020    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43021    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43022
43023    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43024    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43025    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43026    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43027    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43028    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43029
43030    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43031    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43032    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43033    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43034    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43035    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43036
43037    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43038    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43039    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43040    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43041    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43042    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43043
43044    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43045    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43046    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43047    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43048    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43049    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43050
43051    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43052    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43053    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43054    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43055    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43056    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43057
43058    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43059    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43060    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43061    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43062    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43063    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43064    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43065    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43066    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43067    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43068    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43069    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43070    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43071    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43072    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43073    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43074    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43075    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43076    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43077    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43078    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43079    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43080    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43081    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43082    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43083    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43084    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43085    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43086    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43087    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43088    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43089    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43090    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43091    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43092    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43093    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43094
43095    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43096    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43097    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43098    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43099    #[link_name = "llvm.x86.avx512.rcp14.ss"]
43100    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43101    #[link_name = "llvm.x86.avx512.rcp14.sd"]
43102    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43103
43104    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43105    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43106    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43107    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43108    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43109    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43110    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43111    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43112
43113    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43114    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43115    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43116    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43117
43118    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43119    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43120    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43121    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43122    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43123    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43124    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43125    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43126
43127    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43128    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43129    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43130    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43131
43132    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43133    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43134    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43135    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43136
43137    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43138    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43139    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43140    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43141
43142    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43143    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43144
43145    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43146    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43147
43148    #[link_name = "llvm.x86.avx512.cvttss2si"]
43149    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43150    #[link_name = "llvm.x86.avx512.cvttss2usi"]
43151    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43152
43153    #[link_name = "llvm.x86.avx512.cvttsd2si"]
43154    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43155    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43156    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43157
43158    #[link_name = "llvm.x86.avx512.vcomi.ss"]
43159    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43160    #[link_name = "llvm.x86.avx512.vcomi.sd"]
43161    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43162
43163    #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43164    fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43165    #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43166    fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43167    #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43168    fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43169    #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43170    fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43171    #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43172    fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43173    #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43174    fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43175    #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43176    fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43177    #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43178    fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43179    #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43180    fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43181    #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43182    fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43183    #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43184    fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43185    #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43186    fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43187
43188    #[link_name = "llvm.x86.avx512.mask.load.d.128"]
43189    fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43190    #[link_name = "llvm.x86.avx512.mask.load.q.128"]
43191    fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43192    #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43193    fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43194    #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43195    fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43196    #[link_name = "llvm.x86.avx512.mask.load.d.256"]
43197    fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43198    #[link_name = "llvm.x86.avx512.mask.load.q.256"]
43199    fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43200    #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43201    fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43202    #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43203    fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43204    #[link_name = "llvm.x86.avx512.mask.load.d.512"]
43205    fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43206    #[link_name = "llvm.x86.avx512.mask.load.q.512"]
43207    fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43208    #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43209    fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43210    #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43211    fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43212
43213    #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43214    fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43215    #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43216    fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43217    #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43218    fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43219    #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43220    fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43221    #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43222    fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43223    #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43224    fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43225    #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43226    fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43227    #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43228    fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43229    #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43230    fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43231    #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43232    fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43233    #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43234    fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43235    #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43236    fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43237
43238    #[link_name = "llvm.x86.avx512.mask.store.d.128"]
43239    fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43240    #[link_name = "llvm.x86.avx512.mask.store.q.128"]
43241    fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43242    #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43243    fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43244    #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43245    fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43246    #[link_name = "llvm.x86.avx512.mask.store.d.256"]
43247    fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43248    #[link_name = "llvm.x86.avx512.mask.store.q.256"]
43249    fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43250    #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43251    fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43252    #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43253    fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43254    #[link_name = "llvm.x86.avx512.mask.store.d.512"]
43255    fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43256    #[link_name = "llvm.x86.avx512.mask.store.q.512"]
43257    fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43258    #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43259    fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43260    #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43261    fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43262
43263    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43264    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43265    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43266    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43267    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43268    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43269    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43270    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43271    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43272    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43273    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43274    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43275    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43276    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43277    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43278    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43279    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43280    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43281    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43282    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43283    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43284    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43285    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43286    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43287
43288}
43289
43290#[cfg(test)]
43291mod tests {
43292
43293    use stdarch_test::simd_test;
43294
43295    use crate::core_arch::x86::*;
43296    use crate::hint::black_box;
43297    use crate::mem::{self};
43298
43299    #[simd_test(enable = "avx512f")]
43300    unsafe fn test_mm512_abs_epi32() {
43301        #[rustfmt::skip]
43302        let a = _mm512_setr_epi32(
43303            0, 1, -1, i32::MAX,
43304            i32::MIN, 100, -100, -32,
43305            0, 1, -1, i32::MAX,
43306            i32::MIN, 100, -100, -32,
43307        );
43308        let r = _mm512_abs_epi32(a);
43309        #[rustfmt::skip]
43310        let e = _mm512_setr_epi32(
43311            0, 1, 1, i32::MAX,
43312            i32::MAX.wrapping_add(1), 100, 100, 32,
43313            0, 1, 1, i32::MAX,
43314            i32::MAX.wrapping_add(1), 100, 100, 32,
43315        );
43316        assert_eq_m512i(r, e);
43317    }
43318
43319    #[simd_test(enable = "avx512f")]
43320    unsafe fn test_mm512_mask_abs_epi32() {
43321        #[rustfmt::skip]
43322        let a = _mm512_setr_epi32(
43323            0, 1, -1, i32::MAX,
43324            i32::MIN, 100, -100, -32,
43325            0, 1, -1, i32::MAX,
43326            i32::MIN, 100, -100, -32,
43327        );
43328        let r = _mm512_mask_abs_epi32(a, 0, a);
43329        assert_eq_m512i(r, a);
43330        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43331        #[rustfmt::skip]
43332        let e = _mm512_setr_epi32(
43333            0, 1, 1, i32::MAX,
43334            i32::MAX.wrapping_add(1), 100, 100, 32,
43335            0, 1, -1, i32::MAX,
43336            i32::MIN, 100, -100, -32,
43337        );
43338        assert_eq_m512i(r, e);
43339    }
43340
43341    #[simd_test(enable = "avx512f")]
43342    unsafe fn test_mm512_maskz_abs_epi32() {
43343        #[rustfmt::skip]
43344        let a = _mm512_setr_epi32(
43345            0, 1, -1, i32::MAX,
43346            i32::MIN, 100, -100, -32,
43347            0, 1, -1, i32::MAX,
43348            i32::MIN, 100, -100, -32,
43349        );
43350        let r = _mm512_maskz_abs_epi32(0, a);
43351        assert_eq_m512i(r, _mm512_setzero_si512());
43352        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43353        #[rustfmt::skip]
43354        let e = _mm512_setr_epi32(
43355            0, 1, 1, i32::MAX,
43356            i32::MAX.wrapping_add(1), 100, 100, 32,
43357            0, 0, 0, 0,
43358            0, 0, 0, 0,
43359        );
43360        assert_eq_m512i(r, e);
43361    }
43362
43363    #[simd_test(enable = "avx512f,avx512vl")]
43364    unsafe fn test_mm256_mask_abs_epi32() {
43365        #[rustfmt::skip]
43366        let a = _mm256_setr_epi32(
43367            0, 1, -1, i32::MAX,
43368            i32::MIN, 100, -100, -32,
43369        );
43370        let r = _mm256_mask_abs_epi32(a, 0, a);
43371        assert_eq_m256i(r, a);
43372        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43373        #[rustfmt::skip]
43374        let e = _mm256_setr_epi32(
43375            0, 1, 1, i32::MAX,
43376            i32::MAX.wrapping_add(1), 100, -100, -32,
43377        );
43378        assert_eq_m256i(r, e);
43379    }
43380
43381    #[simd_test(enable = "avx512f,avx512vl")]
43382    unsafe fn test_mm256_maskz_abs_epi32() {
43383        #[rustfmt::skip]
43384        let a = _mm256_setr_epi32(
43385            0, 1, -1, i32::MAX,
43386            i32::MIN, 100, -100, -32,
43387        );
43388        let r = _mm256_maskz_abs_epi32(0, a);
43389        assert_eq_m256i(r, _mm256_setzero_si256());
43390        let r = _mm256_maskz_abs_epi32(0b00001111, a);
43391        #[rustfmt::skip]
43392        let e = _mm256_setr_epi32(
43393            0, 1, 1, i32::MAX,
43394            0, 0, 0, 0,
43395        );
43396        assert_eq_m256i(r, e);
43397    }
43398
43399    #[simd_test(enable = "avx512f,avx512vl")]
43400    unsafe fn test_mm_mask_abs_epi32() {
43401        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43402        let r = _mm_mask_abs_epi32(a, 0, a);
43403        assert_eq_m128i(r, a);
43404        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43405        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43406        assert_eq_m128i(r, e);
43407    }
43408
43409    #[simd_test(enable = "avx512f,avx512vl")]
43410    unsafe fn test_mm_maskz_abs_epi32() {
43411        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43412        let r = _mm_maskz_abs_epi32(0, a);
43413        assert_eq_m128i(r, _mm_setzero_si128());
43414        let r = _mm_maskz_abs_epi32(0b00001111, a);
43415        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43416        assert_eq_m128i(r, e);
43417    }
43418
43419    #[simd_test(enable = "avx512f")]
43420    unsafe fn test_mm512_abs_ps() {
43421        #[rustfmt::skip]
43422        let a = _mm512_setr_ps(
43423            0., 1., -1., f32::MAX,
43424            f32::MIN, 100., -100., -32.,
43425            0., 1., -1., f32::MAX,
43426            f32::MIN, 100., -100., -32.,
43427        );
43428        let r = _mm512_abs_ps(a);
43429        #[rustfmt::skip]
43430        let e = _mm512_setr_ps(
43431            0., 1., 1., f32::MAX,
43432            f32::MAX, 100., 100., 32.,
43433            0., 1., 1., f32::MAX,
43434            f32::MAX, 100., 100., 32.,
43435        );
43436        assert_eq_m512(r, e);
43437    }
43438
43439    #[simd_test(enable = "avx512f")]
43440    unsafe fn test_mm512_mask_abs_ps() {
43441        #[rustfmt::skip]
43442        let a = _mm512_setr_ps(
43443            0., 1., -1., f32::MAX,
43444            f32::MIN, 100., -100., -32.,
43445            0., 1., -1., f32::MAX,
43446            f32::MIN, 100., -100., -32.,
43447        );
43448        let r = _mm512_mask_abs_ps(a, 0, a);
43449        assert_eq_m512(r, a);
43450        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43451        #[rustfmt::skip]
43452        let e = _mm512_setr_ps(
43453            0., 1., 1., f32::MAX,
43454            f32::MAX, 100., 100., 32.,
43455            0., 1., -1., f32::MAX,
43456            f32::MIN, 100., -100., -32.,
43457        );
43458        assert_eq_m512(r, e);
43459    }
43460
43461    #[simd_test(enable = "avx512f")]
43462    unsafe fn test_mm512_mask_mov_epi32() {
43463        let src = _mm512_set1_epi32(1);
43464        let a = _mm512_set1_epi32(2);
43465        let r = _mm512_mask_mov_epi32(src, 0, a);
43466        assert_eq_m512i(r, src);
43467        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43468        assert_eq_m512i(r, a);
43469    }
43470
43471    #[simd_test(enable = "avx512f")]
43472    unsafe fn test_mm512_maskz_mov_epi32() {
43473        let a = _mm512_set1_epi32(2);
43474        let r = _mm512_maskz_mov_epi32(0, a);
43475        assert_eq_m512i(r, _mm512_setzero_si512());
43476        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43477        assert_eq_m512i(r, a);
43478    }
43479
43480    #[simd_test(enable = "avx512f,avx512vl")]
43481    unsafe fn test_mm256_mask_mov_epi32() {
43482        let src = _mm256_set1_epi32(1);
43483        let a = _mm256_set1_epi32(2);
43484        let r = _mm256_mask_mov_epi32(src, 0, a);
43485        assert_eq_m256i(r, src);
43486        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43487        assert_eq_m256i(r, a);
43488    }
43489
43490    #[simd_test(enable = "avx512f,avx512vl")]
43491    unsafe fn test_mm256_maskz_mov_epi32() {
43492        let a = _mm256_set1_epi32(2);
43493        let r = _mm256_maskz_mov_epi32(0, a);
43494        assert_eq_m256i(r, _mm256_setzero_si256());
43495        let r = _mm256_maskz_mov_epi32(0b11111111, a);
43496        assert_eq_m256i(r, a);
43497    }
43498
43499    #[simd_test(enable = "avx512f,avx512vl")]
43500    unsafe fn test_mm_mask_mov_epi32() {
43501        let src = _mm_set1_epi32(1);
43502        let a = _mm_set1_epi32(2);
43503        let r = _mm_mask_mov_epi32(src, 0, a);
43504        assert_eq_m128i(r, src);
43505        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43506        assert_eq_m128i(r, a);
43507    }
43508
43509    #[simd_test(enable = "avx512f,avx512vl")]
43510    unsafe fn test_mm_maskz_mov_epi32() {
43511        let a = _mm_set1_epi32(2);
43512        let r = _mm_maskz_mov_epi32(0, a);
43513        assert_eq_m128i(r, _mm_setzero_si128());
43514        let r = _mm_maskz_mov_epi32(0b00001111, a);
43515        assert_eq_m128i(r, a);
43516    }
43517
43518    #[simd_test(enable = "avx512f")]
43519    unsafe fn test_mm512_mask_mov_ps() {
43520        let src = _mm512_set1_ps(1.);
43521        let a = _mm512_set1_ps(2.);
43522        let r = _mm512_mask_mov_ps(src, 0, a);
43523        assert_eq_m512(r, src);
43524        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43525        assert_eq_m512(r, a);
43526    }
43527
43528    #[simd_test(enable = "avx512f")]
43529    unsafe fn test_mm512_maskz_mov_ps() {
43530        let a = _mm512_set1_ps(2.);
43531        let r = _mm512_maskz_mov_ps(0, a);
43532        assert_eq_m512(r, _mm512_setzero_ps());
43533        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43534        assert_eq_m512(r, a);
43535    }
43536
43537    #[simd_test(enable = "avx512f,avx512vl")]
43538    unsafe fn test_mm256_mask_mov_ps() {
43539        let src = _mm256_set1_ps(1.);
43540        let a = _mm256_set1_ps(2.);
43541        let r = _mm256_mask_mov_ps(src, 0, a);
43542        assert_eq_m256(r, src);
43543        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43544        assert_eq_m256(r, a);
43545    }
43546
43547    #[simd_test(enable = "avx512f,avx512vl")]
43548    unsafe fn test_mm256_maskz_mov_ps() {
43549        let a = _mm256_set1_ps(2.);
43550        let r = _mm256_maskz_mov_ps(0, a);
43551        assert_eq_m256(r, _mm256_setzero_ps());
43552        let r = _mm256_maskz_mov_ps(0b11111111, a);
43553        assert_eq_m256(r, a);
43554    }
43555
43556    #[simd_test(enable = "avx512f,avx512vl")]
43557    unsafe fn test_mm_mask_mov_ps() {
43558        let src = _mm_set1_ps(1.);
43559        let a = _mm_set1_ps(2.);
43560        let r = _mm_mask_mov_ps(src, 0, a);
43561        assert_eq_m128(r, src);
43562        let r = _mm_mask_mov_ps(src, 0b00001111, a);
43563        assert_eq_m128(r, a);
43564    }
43565
43566    #[simd_test(enable = "avx512f,avx512vl")]
43567    unsafe fn test_mm_maskz_mov_ps() {
43568        let a = _mm_set1_ps(2.);
43569        let r = _mm_maskz_mov_ps(0, a);
43570        assert_eq_m128(r, _mm_setzero_ps());
43571        let r = _mm_maskz_mov_ps(0b00001111, a);
43572        assert_eq_m128(r, a);
43573    }
43574
43575    #[simd_test(enable = "avx512f")]
43576    unsafe fn test_mm512_add_epi32() {
43577        #[rustfmt::skip]
43578        let a = _mm512_setr_epi32(
43579            0, 1, -1, i32::MAX,
43580            i32::MIN, 100, -100, -32,
43581            0, 1, -1, i32::MAX,
43582            i32::MIN, 100, -100, -32,
43583        );
43584        let b = _mm512_set1_epi32(1);
43585        let r = _mm512_add_epi32(a, b);
43586        #[rustfmt::skip]
43587        let e = _mm512_setr_epi32(
43588            1, 2, 0, i32::MIN,
43589            i32::MIN + 1, 101, -99, -31,
43590            1, 2, 0, i32::MIN,
43591            i32::MIN + 1, 101, -99, -31,
43592        );
43593        assert_eq_m512i(r, e);
43594    }
43595
43596    #[simd_test(enable = "avx512f")]
43597    unsafe fn test_mm512_mask_add_epi32() {
43598        #[rustfmt::skip]
43599        let a = _mm512_setr_epi32(
43600            0, 1, -1, i32::MAX,
43601            i32::MIN, 100, -100, -32,
43602            0, 1, -1, i32::MAX,
43603            i32::MIN, 100, -100, -32,
43604        );
43605        let b = _mm512_set1_epi32(1);
43606        let r = _mm512_mask_add_epi32(a, 0, a, b);
43607        assert_eq_m512i(r, a);
43608        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43609        #[rustfmt::skip]
43610        let e = _mm512_setr_epi32(
43611            1, 2, 0, i32::MIN,
43612            i32::MIN + 1, 101, -99, -31,
43613            0, 1, -1, i32::MAX,
43614            i32::MIN, 100, -100, -32,
43615        );
43616        assert_eq_m512i(r, e);
43617    }
43618
43619    #[simd_test(enable = "avx512f")]
43620    unsafe fn test_mm512_maskz_add_epi32() {
43621        #[rustfmt::skip]
43622        let a = _mm512_setr_epi32(
43623            0, 1, -1, i32::MAX,
43624            i32::MIN, 100, -100, -32,
43625            0, 1, -1, i32::MAX,
43626            i32::MIN, 100, -100, -32,
43627        );
43628        let b = _mm512_set1_epi32(1);
43629        let r = _mm512_maskz_add_epi32(0, a, b);
43630        assert_eq_m512i(r, _mm512_setzero_si512());
43631        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43632        #[rustfmt::skip]
43633        let e = _mm512_setr_epi32(
43634            1, 2, 0, i32::MIN,
43635            i32::MIN + 1, 101, -99, -31,
43636            0, 0, 0, 0,
43637            0, 0, 0, 0,
43638        );
43639        assert_eq_m512i(r, e);
43640    }
43641
43642    #[simd_test(enable = "avx512f,avx512vl")]
43643    unsafe fn test_mm256_mask_add_epi32() {
43644        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43645        let b = _mm256_set1_epi32(1);
43646        let r = _mm256_mask_add_epi32(a, 0, a, b);
43647        assert_eq_m256i(r, a);
43648        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43649        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43650        assert_eq_m256i(r, e);
43651    }
43652
43653    #[simd_test(enable = "avx512f,avx512vl")]
43654    unsafe fn test_mm256_maskz_add_epi32() {
43655        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43656        let b = _mm256_set1_epi32(1);
43657        let r = _mm256_maskz_add_epi32(0, a, b);
43658        assert_eq_m256i(r, _mm256_setzero_si256());
43659        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43660        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43661        assert_eq_m256i(r, e);
43662    }
43663
43664    #[simd_test(enable = "avx512f,avx512vl")]
43665    unsafe fn test_mm_mask_add_epi32() {
43666        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43667        let b = _mm_set1_epi32(1);
43668        let r = _mm_mask_add_epi32(a, 0, a, b);
43669        assert_eq_m128i(r, a);
43670        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43671        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43672        assert_eq_m128i(r, e);
43673    }
43674
43675    #[simd_test(enable = "avx512f,avx512vl")]
43676    unsafe fn test_mm_maskz_add_epi32() {
43677        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43678        let b = _mm_set1_epi32(1);
43679        let r = _mm_maskz_add_epi32(0, a, b);
43680        assert_eq_m128i(r, _mm_setzero_si128());
43681        let r = _mm_maskz_add_epi32(0b00001111, a, b);
43682        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43683        assert_eq_m128i(r, e);
43684    }
43685
43686    #[simd_test(enable = "avx512f")]
43687    unsafe fn test_mm512_add_ps() {
43688        #[rustfmt::skip]
43689        let a = _mm512_setr_ps(
43690            0., 1., -1., f32::MAX,
43691            f32::MIN, 100., -100., -32.,
43692            0., 1., -1., f32::MAX,
43693            f32::MIN, 100., -100., -32.,
43694        );
43695        let b = _mm512_set1_ps(1.);
43696        let r = _mm512_add_ps(a, b);
43697        #[rustfmt::skip]
43698        let e = _mm512_setr_ps(
43699            1., 2., 0., f32::MAX,
43700            f32::MIN + 1., 101., -99., -31.,
43701            1., 2., 0., f32::MAX,
43702            f32::MIN + 1., 101., -99., -31.,
43703        );
43704        assert_eq_m512(r, e);
43705    }
43706
43707    #[simd_test(enable = "avx512f")]
43708    unsafe fn test_mm512_mask_add_ps() {
43709        #[rustfmt::skip]
43710        let a = _mm512_setr_ps(
43711            0., 1., -1., f32::MAX,
43712            f32::MIN, 100., -100., -32.,
43713            0., 1., -1., f32::MAX,
43714            f32::MIN, 100., -100., -32.,
43715        );
43716        let b = _mm512_set1_ps(1.);
43717        let r = _mm512_mask_add_ps(a, 0, a, b);
43718        assert_eq_m512(r, a);
43719        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43720        #[rustfmt::skip]
43721        let e = _mm512_setr_ps(
43722            1., 2., 0., f32::MAX,
43723            f32::MIN + 1., 101., -99., -31.,
43724            0., 1., -1., f32::MAX,
43725            f32::MIN, 100., -100., -32.,
43726        );
43727        assert_eq_m512(r, e);
43728    }
43729
43730    #[simd_test(enable = "avx512f")]
43731    unsafe fn test_mm512_maskz_add_ps() {
43732        #[rustfmt::skip]
43733        let a = _mm512_setr_ps(
43734            0., 1., -1., f32::MAX,
43735            f32::MIN, 100., -100., -32.,
43736            0., 1., -1., f32::MAX,
43737            f32::MIN, 100., -100., -32.,
43738        );
43739        let b = _mm512_set1_ps(1.);
43740        let r = _mm512_maskz_add_ps(0, a, b);
43741        assert_eq_m512(r, _mm512_setzero_ps());
43742        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43743        #[rustfmt::skip]
43744        let e = _mm512_setr_ps(
43745            1., 2., 0., f32::MAX,
43746            f32::MIN + 1., 101., -99., -31.,
43747            0., 0., 0., 0.,
43748            0., 0., 0., 0.,
43749        );
43750        assert_eq_m512(r, e);
43751    }
43752
43753    #[simd_test(enable = "avx512f,avx512vl")]
43754    unsafe fn test_mm256_mask_add_ps() {
43755        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43756        let b = _mm256_set1_ps(1.);
43757        let r = _mm256_mask_add_ps(a, 0, a, b);
43758        assert_eq_m256(r, a);
43759        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43760        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43761        assert_eq_m256(r, e);
43762    }
43763
43764    #[simd_test(enable = "avx512f,avx512vl")]
43765    unsafe fn test_mm256_maskz_add_ps() {
43766        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43767        let b = _mm256_set1_ps(1.);
43768        let r = _mm256_maskz_add_ps(0, a, b);
43769        assert_eq_m256(r, _mm256_setzero_ps());
43770        let r = _mm256_maskz_add_ps(0b11111111, a, b);
43771        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43772        assert_eq_m256(r, e);
43773    }
43774
43775    #[simd_test(enable = "avx512f,avx512vl")]
43776    unsafe fn test_mm_mask_add_ps() {
43777        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43778        let b = _mm_set1_ps(1.);
43779        let r = _mm_mask_add_ps(a, 0, a, b);
43780        assert_eq_m128(r, a);
43781        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43782        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43783        assert_eq_m128(r, e);
43784    }
43785
43786    #[simd_test(enable = "avx512f,avx512vl")]
43787    unsafe fn test_mm_maskz_add_ps() {
43788        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43789        let b = _mm_set1_ps(1.);
43790        let r = _mm_maskz_add_ps(0, a, b);
43791        assert_eq_m128(r, _mm_setzero_ps());
43792        let r = _mm_maskz_add_ps(0b00001111, a, b);
43793        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43794        assert_eq_m128(r, e);
43795    }
43796
43797    #[simd_test(enable = "avx512f")]
43798    unsafe fn test_mm512_sub_epi32() {
43799        #[rustfmt::skip]
43800        let a = _mm512_setr_epi32(
43801            0, 1, -1, i32::MAX,
43802            i32::MIN, 100, -100, -32,
43803            0, 1, -1, i32::MAX,
43804            i32::MIN, 100, -100, -32,
43805        );
43806        let b = _mm512_set1_epi32(1);
43807        let r = _mm512_sub_epi32(a, b);
43808        #[rustfmt::skip]
43809        let e = _mm512_setr_epi32(
43810            -1, 0, -2, i32::MAX - 1,
43811            i32::MAX, 99, -101, -33,
43812            -1, 0, -2, i32::MAX - 1,
43813            i32::MAX, 99, -101, -33,
43814        );
43815        assert_eq_m512i(r, e);
43816    }
43817
43818    #[simd_test(enable = "avx512f")]
43819    unsafe fn test_mm512_mask_sub_epi32() {
43820        #[rustfmt::skip]
43821        let a = _mm512_setr_epi32(
43822            0, 1, -1, i32::MAX,
43823            i32::MIN, 100, -100, -32,
43824            0, 1, -1, i32::MAX,
43825            i32::MIN, 100, -100, -32,
43826        );
43827        let b = _mm512_set1_epi32(1);
43828        let r = _mm512_mask_sub_epi32(a, 0, a, b);
43829        assert_eq_m512i(r, a);
43830        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43831        #[rustfmt::skip]
43832        let e = _mm512_setr_epi32(
43833            -1, 0, -2, i32::MAX - 1,
43834            i32::MAX, 99, -101, -33,
43835            0, 1, -1, i32::MAX,
43836            i32::MIN, 100, -100, -32,
43837        );
43838        assert_eq_m512i(r, e);
43839    }
43840
43841    #[simd_test(enable = "avx512f")]
43842    unsafe fn test_mm512_maskz_sub_epi32() {
43843        #[rustfmt::skip]
43844        let a = _mm512_setr_epi32(
43845            0, 1, -1, i32::MAX,
43846            i32::MIN, 100, -100, -32,
43847            0, 1, -1, i32::MAX,
43848            i32::MIN, 100, -100, -32,
43849        );
43850        let b = _mm512_set1_epi32(1);
43851        let r = _mm512_maskz_sub_epi32(0, a, b);
43852        assert_eq_m512i(r, _mm512_setzero_si512());
43853        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
43854        #[rustfmt::skip]
43855        let e = _mm512_setr_epi32(
43856            -1, 0, -2, i32::MAX - 1,
43857            i32::MAX, 99, -101, -33,
43858            0, 0, 0, 0,
43859            0, 0, 0, 0,
43860        );
43861        assert_eq_m512i(r, e);
43862    }
43863
43864    #[simd_test(enable = "avx512f,avx512vl")]
43865    unsafe fn test_mm256_mask_sub_epi32() {
43866        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43867        let b = _mm256_set1_epi32(1);
43868        let r = _mm256_mask_sub_epi32(a, 0, a, b);
43869        assert_eq_m256i(r, a);
43870        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
43871        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43872        assert_eq_m256i(r, e);
43873    }
43874
43875    #[simd_test(enable = "avx512f,avx512vl")]
43876    unsafe fn test_mm256_maskz_sub_epi32() {
43877        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43878        let b = _mm256_set1_epi32(1);
43879        let r = _mm256_maskz_sub_epi32(0, a, b);
43880        assert_eq_m256i(r, _mm256_setzero_si256());
43881        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
43882        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43883        assert_eq_m256i(r, e);
43884    }
43885
43886    #[simd_test(enable = "avx512f,avx512vl")]
43887    unsafe fn test_mm_mask_sub_epi32() {
43888        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43889        let b = _mm_set1_epi32(1);
43890        let r = _mm_mask_sub_epi32(a, 0, a, b);
43891        assert_eq_m128i(r, a);
43892        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
43893        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43894        assert_eq_m128i(r, e);
43895    }
43896
43897    #[simd_test(enable = "avx512f,avx512vl")]
43898    unsafe fn test_mm_maskz_sub_epi32() {
43899        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43900        let b = _mm_set1_epi32(1);
43901        let r = _mm_maskz_sub_epi32(0, a, b);
43902        assert_eq_m128i(r, _mm_setzero_si128());
43903        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
43904        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43905        assert_eq_m128i(r, e);
43906    }
43907
43908    #[simd_test(enable = "avx512f")]
43909    unsafe fn test_mm512_sub_ps() {
43910        #[rustfmt::skip]
43911        let a = _mm512_setr_ps(
43912            0., 1., -1., f32::MAX,
43913            f32::MIN, 100., -100., -32.,
43914            0., 1., -1., f32::MAX,
43915            f32::MIN, 100., -100., -32.,
43916        );
43917        let b = _mm512_set1_ps(1.);
43918        let r = _mm512_sub_ps(a, b);
43919        #[rustfmt::skip]
43920        let e = _mm512_setr_ps(
43921            -1., 0., -2., f32::MAX - 1.,
43922            f32::MIN, 99., -101., -33.,
43923            -1., 0., -2., f32::MAX - 1.,
43924            f32::MIN, 99., -101., -33.,
43925        );
43926        assert_eq_m512(r, e);
43927    }
43928
43929    #[simd_test(enable = "avx512f")]
43930    unsafe fn test_mm512_mask_sub_ps() {
43931        #[rustfmt::skip]
43932        let a = _mm512_setr_ps(
43933            0., 1., -1., f32::MAX,
43934            f32::MIN, 100., -100., -32.,
43935            0., 1., -1., f32::MAX,
43936            f32::MIN, 100., -100., -32.,
43937        );
43938        let b = _mm512_set1_ps(1.);
43939        let r = _mm512_mask_sub_ps(a, 0, a, b);
43940        assert_eq_m512(r, a);
43941        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
43942        #[rustfmt::skip]
43943        let e = _mm512_setr_ps(
43944            -1., 0., -2., f32::MAX - 1.,
43945            f32::MIN, 99., -101., -33.,
43946            0., 1., -1., f32::MAX,
43947            f32::MIN, 100., -100., -32.,
43948        );
43949        assert_eq_m512(r, e);
43950    }
43951
43952    #[simd_test(enable = "avx512f")]
43953    unsafe fn test_mm512_maskz_sub_ps() {
43954        #[rustfmt::skip]
43955        let a = _mm512_setr_ps(
43956            0., 1., -1., f32::MAX,
43957            f32::MIN, 100., -100., -32.,
43958            0., 1., -1., f32::MAX,
43959            f32::MIN, 100., -100., -32.,
43960        );
43961        let b = _mm512_set1_ps(1.);
43962        let r = _mm512_maskz_sub_ps(0, a, b);
43963        assert_eq_m512(r, _mm512_setzero_ps());
43964        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
43965        #[rustfmt::skip]
43966        let e = _mm512_setr_ps(
43967            -1., 0., -2., f32::MAX - 1.,
43968            f32::MIN, 99., -101., -33.,
43969            0., 0., 0., 0.,
43970            0., 0., 0., 0.,
43971        );
43972        assert_eq_m512(r, e);
43973    }
43974
43975    #[simd_test(enable = "avx512f,avx512vl")]
43976    unsafe fn test_mm256_mask_sub_ps() {
43977        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43978        let b = _mm256_set1_ps(1.);
43979        let r = _mm256_mask_sub_ps(a, 0, a, b);
43980        assert_eq_m256(r, a);
43981        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
43982        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43983        assert_eq_m256(r, e);
43984    }
43985
43986    #[simd_test(enable = "avx512f,avx512vl")]
43987    unsafe fn test_mm256_maskz_sub_ps() {
43988        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43989        let b = _mm256_set1_ps(1.);
43990        let r = _mm256_maskz_sub_ps(0, a, b);
43991        assert_eq_m256(r, _mm256_setzero_ps());
43992        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
43993        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43994        assert_eq_m256(r, e);
43995    }
43996
43997    #[simd_test(enable = "avx512f,avx512vl")]
43998    unsafe fn test_mm_mask_sub_ps() {
43999        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44000        let b = _mm_set1_ps(1.);
44001        let r = _mm_mask_sub_ps(a, 0, a, b);
44002        assert_eq_m128(r, a);
44003        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
44004        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44005        assert_eq_m128(r, e);
44006    }
44007
44008    #[simd_test(enable = "avx512f,avx512vl")]
44009    unsafe fn test_mm_maskz_sub_ps() {
44010        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44011        let b = _mm_set1_ps(1.);
44012        let r = _mm_maskz_sub_ps(0, a, b);
44013        assert_eq_m128(r, _mm_setzero_ps());
44014        let r = _mm_maskz_sub_ps(0b00001111, a, b);
44015        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44016        assert_eq_m128(r, e);
44017    }
44018
44019    #[simd_test(enable = "avx512f")]
44020    unsafe fn test_mm512_mullo_epi32() {
44021        #[rustfmt::skip]
44022        let a = _mm512_setr_epi32(
44023            0, 1, -1, i32::MAX,
44024            i32::MIN, 100, -100, -32,
44025            0, 1, -1, i32::MAX,
44026            i32::MIN, 100, -100, -32,
44027        );
44028        let b = _mm512_set1_epi32(2);
44029        let r = _mm512_mullo_epi32(a, b);
44030        let e = _mm512_setr_epi32(
44031            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
44032        );
44033        assert_eq_m512i(r, e);
44034    }
44035
44036    #[simd_test(enable = "avx512f")]
44037    unsafe fn test_mm512_mask_mullo_epi32() {
44038        #[rustfmt::skip]
44039        let a = _mm512_setr_epi32(
44040            0, 1, -1, i32::MAX,
44041            i32::MIN, 100, -100, -32,
44042            0, 1, -1, i32::MAX,
44043            i32::MIN, 100, -100, -32,
44044        );
44045        let b = _mm512_set1_epi32(2);
44046        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
44047        assert_eq_m512i(r, a);
44048        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
44049        #[rustfmt::skip]
44050        let e = _mm512_setr_epi32(
44051            0, 2, -2, -2,
44052            0, 200, -200, -64,
44053            0, 1, -1, i32::MAX,
44054            i32::MIN, 100, -100, -32,
44055        );
44056        assert_eq_m512i(r, e);
44057    }
44058
44059    #[simd_test(enable = "avx512f")]
44060    unsafe fn test_mm512_maskz_mullo_epi32() {
44061        #[rustfmt::skip]
44062        let a = _mm512_setr_epi32(
44063            0, 1, -1, i32::MAX,
44064            i32::MIN, 100, -100, -32,
44065            0, 1, -1, i32::MAX,
44066            i32::MIN, 100, -100, -32,
44067        );
44068        let b = _mm512_set1_epi32(2);
44069        let r = _mm512_maskz_mullo_epi32(0, a, b);
44070        assert_eq_m512i(r, _mm512_setzero_si512());
44071        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
44072        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
44073        assert_eq_m512i(r, e);
44074    }
44075
44076    #[simd_test(enable = "avx512f,avx512vl")]
44077    unsafe fn test_mm256_mask_mullo_epi32() {
44078        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44079        let b = _mm256_set1_epi32(2);
44080        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
44081        assert_eq_m256i(r, a);
44082        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
44083        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44084        assert_eq_m256i(r, e);
44085    }
44086
44087    #[simd_test(enable = "avx512f,avx512vl")]
44088    unsafe fn test_mm256_maskz_mullo_epi32() {
44089        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44090        let b = _mm256_set1_epi32(2);
44091        let r = _mm256_maskz_mullo_epi32(0, a, b);
44092        assert_eq_m256i(r, _mm256_setzero_si256());
44093        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
44094        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44095        assert_eq_m256i(r, e);
44096    }
44097
44098    #[simd_test(enable = "avx512f,avx512vl")]
44099    unsafe fn test_mm_mask_mullo_epi32() {
44100        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44101        let b = _mm_set1_epi32(2);
44102        let r = _mm_mask_mullo_epi32(a, 0, a, b);
44103        assert_eq_m128i(r, a);
44104        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
44105        let e = _mm_set_epi32(2, -2, -2, 0);
44106        assert_eq_m128i(r, e);
44107    }
44108
44109    #[simd_test(enable = "avx512f,avx512vl")]
44110    unsafe fn test_mm_maskz_mullo_epi32() {
44111        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44112        let b = _mm_set1_epi32(2);
44113        let r = _mm_maskz_mullo_epi32(0, a, b);
44114        assert_eq_m128i(r, _mm_setzero_si128());
44115        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
44116        let e = _mm_set_epi32(2, -2, -2, 0);
44117        assert_eq_m128i(r, e);
44118    }
44119
44120    #[simd_test(enable = "avx512f")]
44121    unsafe fn test_mm512_mul_ps() {
44122        #[rustfmt::skip]
44123        let a = _mm512_setr_ps(
44124            0., 1., -1., f32::MAX,
44125            f32::MIN, 100., -100., -32.,
44126            0., 1., -1., f32::MAX,
44127            f32::MIN, 100., -100., -32.,
44128        );
44129        let b = _mm512_set1_ps(2.);
44130        let r = _mm512_mul_ps(a, b);
44131        #[rustfmt::skip]
44132        let e = _mm512_setr_ps(
44133            0., 2., -2., f32::INFINITY,
44134            f32::NEG_INFINITY, 200., -200., -64.,
44135            0., 2., -2., f32::INFINITY,
44136            f32::NEG_INFINITY, 200., -200.,
44137            -64.,
44138        );
44139        assert_eq_m512(r, e);
44140    }
44141
44142    #[simd_test(enable = "avx512f")]
44143    unsafe fn test_mm512_mask_mul_ps() {
44144        #[rustfmt::skip]
44145        let a = _mm512_setr_ps(
44146            0., 1., -1., f32::MAX,
44147            f32::MIN, 100., -100., -32.,
44148            0., 1., -1., f32::MAX,
44149            f32::MIN, 100., -100., -32.,
44150        );
44151        let b = _mm512_set1_ps(2.);
44152        let r = _mm512_mask_mul_ps(a, 0, a, b);
44153        assert_eq_m512(r, a);
44154        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
44155        #[rustfmt::skip]
44156        let e = _mm512_setr_ps(
44157            0., 2., -2., f32::INFINITY,
44158            f32::NEG_INFINITY, 200., -200., -64.,
44159            0., 1., -1., f32::MAX,
44160            f32::MIN, 100., -100., -32.,
44161        );
44162        assert_eq_m512(r, e);
44163    }
44164
44165    #[simd_test(enable = "avx512f")]
44166    unsafe fn test_mm512_maskz_mul_ps() {
44167        #[rustfmt::skip]
44168        let a = _mm512_setr_ps(
44169            0., 1., -1., f32::MAX,
44170            f32::MIN, 100., -100., -32.,
44171            0., 1., -1., f32::MAX,
44172            f32::MIN, 100., -100., -32.,
44173        );
44174        let b = _mm512_set1_ps(2.);
44175        let r = _mm512_maskz_mul_ps(0, a, b);
44176        assert_eq_m512(r, _mm512_setzero_ps());
44177        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
44178        #[rustfmt::skip]
44179        let e = _mm512_setr_ps(
44180            0., 2., -2., f32::INFINITY,
44181            f32::NEG_INFINITY, 200., -200., -64.,
44182            0., 0., 0., 0.,
44183            0., 0., 0., 0.,
44184        );
44185        assert_eq_m512(r, e);
44186    }
44187
44188    #[simd_test(enable = "avx512f,avx512vl")]
44189    unsafe fn test_mm256_mask_mul_ps() {
44190        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44191        let b = _mm256_set1_ps(2.);
44192        let r = _mm256_mask_mul_ps(a, 0, a, b);
44193        assert_eq_m256(r, a);
44194        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
44195        #[rustfmt::skip]
44196        let e = _mm256_set_ps(
44197            0., 2., -2., f32::INFINITY,
44198            f32::NEG_INFINITY, 200., -200., -64.,
44199        );
44200        assert_eq_m256(r, e);
44201    }
44202
44203    #[simd_test(enable = "avx512f,avx512vl")]
44204    unsafe fn test_mm256_maskz_mul_ps() {
44205        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44206        let b = _mm256_set1_ps(2.);
44207        let r = _mm256_maskz_mul_ps(0, a, b);
44208        assert_eq_m256(r, _mm256_setzero_ps());
44209        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
44210        #[rustfmt::skip]
44211        let e = _mm256_set_ps(
44212            0., 2., -2., f32::INFINITY,
44213            f32::NEG_INFINITY, 200., -200., -64.,
44214        );
44215        assert_eq_m256(r, e);
44216    }
44217
44218    #[simd_test(enable = "avx512f,avx512vl")]
44219    unsafe fn test_mm_mask_mul_ps() {
44220        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44221        let b = _mm_set1_ps(2.);
44222        let r = _mm_mask_mul_ps(a, 0, a, b);
44223        assert_eq_m128(r, a);
44224        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
44225        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44226        assert_eq_m128(r, e);
44227    }
44228
44229    #[simd_test(enable = "avx512f,avx512vl")]
44230    unsafe fn test_mm_maskz_mul_ps() {
44231        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44232        let b = _mm_set1_ps(2.);
44233        let r = _mm_maskz_mul_ps(0, a, b);
44234        assert_eq_m128(r, _mm_setzero_ps());
44235        let r = _mm_maskz_mul_ps(0b00001111, a, b);
44236        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44237        assert_eq_m128(r, e);
44238    }
44239
44240    #[simd_test(enable = "avx512f")]
44241    unsafe fn test_mm512_div_ps() {
44242        let a = _mm512_setr_ps(
44243            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44244        );
44245        let b = _mm512_setr_ps(
44246            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44247        );
44248        let r = _mm512_div_ps(a, b);
44249        #[rustfmt::skip]
44250        let e = _mm512_setr_ps(
44251            0., 0.5, -0.5, -1.,
44252            50., f32::INFINITY, -50., -16.,
44253            0., 0.5, -0.5, 500.,
44254            f32::NEG_INFINITY, 50., -50., -16.,
44255        );
44256        assert_eq_m512(r, e); // 0/0 = NAN
44257    }
44258
44259    #[simd_test(enable = "avx512f")]
44260    unsafe fn test_mm512_mask_div_ps() {
44261        let a = _mm512_setr_ps(
44262            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44263        );
44264        let b = _mm512_setr_ps(
44265            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44266        );
44267        let r = _mm512_mask_div_ps(a, 0, a, b);
44268        assert_eq_m512(r, a);
44269        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
44270        #[rustfmt::skip]
44271        let e = _mm512_setr_ps(
44272            0., 0.5, -0.5, -1.,
44273            50., f32::INFINITY, -50., -16.,
44274            0., 1., -1., 1000.,
44275            -131., 100., -100., -32.,
44276        );
44277        assert_eq_m512(r, e);
44278    }
44279
44280    #[simd_test(enable = "avx512f")]
44281    unsafe fn test_mm512_maskz_div_ps() {
44282        let a = _mm512_setr_ps(
44283            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44284        );
44285        let b = _mm512_setr_ps(
44286            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44287        );
44288        let r = _mm512_maskz_div_ps(0, a, b);
44289        assert_eq_m512(r, _mm512_setzero_ps());
44290        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
44291        #[rustfmt::skip]
44292        let e = _mm512_setr_ps(
44293            0., 0.5, -0.5, -1.,
44294            50., f32::INFINITY, -50., -16.,
44295            0., 0., 0., 0.,
44296            0., 0., 0., 0.,
44297        );
44298        assert_eq_m512(r, e);
44299    }
44300
44301    #[simd_test(enable = "avx512f,avx512vl")]
44302    unsafe fn test_mm256_mask_div_ps() {
44303        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44304        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44305        let r = _mm256_mask_div_ps(a, 0, a, b);
44306        assert_eq_m256(r, a);
44307        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
44308        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44309        assert_eq_m256(r, e);
44310    }
44311
44312    #[simd_test(enable = "avx512f,avx512vl")]
44313    unsafe fn test_mm256_maskz_div_ps() {
44314        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44315        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44316        let r = _mm256_maskz_div_ps(0, a, b);
44317        assert_eq_m256(r, _mm256_setzero_ps());
44318        let r = _mm256_maskz_div_ps(0b11111111, a, b);
44319        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44320        assert_eq_m256(r, e);
44321    }
44322
44323    #[simd_test(enable = "avx512f,avx512vl")]
44324    unsafe fn test_mm_mask_div_ps() {
44325        let a = _mm_set_ps(100., 100., -100., -32.);
44326        let b = _mm_set_ps(2., 0., 2., 2.);
44327        let r = _mm_mask_div_ps(a, 0, a, b);
44328        assert_eq_m128(r, a);
44329        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
44330        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44331        assert_eq_m128(r, e);
44332    }
44333
44334    #[simd_test(enable = "avx512f,avx512vl")]
44335    unsafe fn test_mm_maskz_div_ps() {
44336        let a = _mm_set_ps(100., 100., -100., -32.);
44337        let b = _mm_set_ps(2., 0., 2., 2.);
44338        let r = _mm_maskz_div_ps(0, a, b);
44339        assert_eq_m128(r, _mm_setzero_ps());
44340        let r = _mm_maskz_div_ps(0b00001111, a, b);
44341        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44342        assert_eq_m128(r, e);
44343    }
44344
44345    #[simd_test(enable = "avx512f")]
44346    unsafe fn test_mm512_max_epi32() {
44347        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44348        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44349        let r = _mm512_max_epi32(a, b);
44350        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44351        assert_eq_m512i(r, e);
44352    }
44353
44354    #[simd_test(enable = "avx512f")]
44355    unsafe fn test_mm512_mask_max_epi32() {
44356        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44357        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44358        let r = _mm512_mask_max_epi32(a, 0, a, b);
44359        assert_eq_m512i(r, a);
44360        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
44361        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44362        assert_eq_m512i(r, e);
44363    }
44364
44365    #[simd_test(enable = "avx512f")]
44366    unsafe fn test_mm512_maskz_max_epi32() {
44367        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44368        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44369        let r = _mm512_maskz_max_epi32(0, a, b);
44370        assert_eq_m512i(r, _mm512_setzero_si512());
44371        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
44372        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44373        assert_eq_m512i(r, e);
44374    }
44375
44376    #[simd_test(enable = "avx512f,avx512vl")]
44377    unsafe fn test_mm256_mask_max_epi32() {
44378        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44379        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44380        let r = _mm256_mask_max_epi32(a, 0, a, b);
44381        assert_eq_m256i(r, a);
44382        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
44383        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44384        assert_eq_m256i(r, e);
44385    }
44386
44387    #[simd_test(enable = "avx512f,avx512vl")]
44388    unsafe fn test_mm256_maskz_max_epi32() {
44389        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44390        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44391        let r = _mm256_maskz_max_epi32(0, a, b);
44392        assert_eq_m256i(r, _mm256_setzero_si256());
44393        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
44394        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44395        assert_eq_m256i(r, e);
44396    }
44397
44398    #[simd_test(enable = "avx512f,avx512vl")]
44399    unsafe fn test_mm_mask_max_epi32() {
44400        let a = _mm_set_epi32(0, 1, 2, 3);
44401        let b = _mm_set_epi32(3, 2, 1, 0);
44402        let r = _mm_mask_max_epi32(a, 0, a, b);
44403        assert_eq_m128i(r, a);
44404        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
44405        let e = _mm_set_epi32(3, 2, 2, 3);
44406        assert_eq_m128i(r, e);
44407    }
44408
44409    #[simd_test(enable = "avx512f,avx512vl")]
44410    unsafe fn test_mm_maskz_max_epi32() {
44411        let a = _mm_set_epi32(0, 1, 2, 3);
44412        let b = _mm_set_epi32(3, 2, 1, 0);
44413        let r = _mm_maskz_max_epi32(0, a, b);
44414        assert_eq_m128i(r, _mm_setzero_si128());
44415        let r = _mm_maskz_max_epi32(0b00001111, a, b);
44416        let e = _mm_set_epi32(3, 2, 2, 3);
44417        assert_eq_m128i(r, e);
44418    }
44419
44420    #[simd_test(enable = "avx512f")]
44421    unsafe fn test_mm512_max_ps() {
44422        let a = _mm512_setr_ps(
44423            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44424        );
44425        let b = _mm512_setr_ps(
44426            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44427        );
44428        let r = _mm512_max_ps(a, b);
44429        let e = _mm512_setr_ps(
44430            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44431        );
44432        assert_eq_m512(r, e);
44433    }
44434
44435    #[simd_test(enable = "avx512f")]
44436    unsafe fn test_mm512_mask_max_ps() {
44437        let a = _mm512_setr_ps(
44438            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44439        );
44440        let b = _mm512_setr_ps(
44441            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44442        );
44443        let r = _mm512_mask_max_ps(a, 0, a, b);
44444        assert_eq_m512(r, a);
44445        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
44446        let e = _mm512_setr_ps(
44447            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44448        );
44449        assert_eq_m512(r, e);
44450    }
44451
44452    #[simd_test(enable = "avx512f")]
44453    unsafe fn test_mm512_maskz_max_ps() {
44454        let a = _mm512_setr_ps(
44455            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44456        );
44457        let b = _mm512_setr_ps(
44458            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44459        );
44460        let r = _mm512_maskz_max_ps(0, a, b);
44461        assert_eq_m512(r, _mm512_setzero_ps());
44462        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
44463        let e = _mm512_setr_ps(
44464            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44465        );
44466        assert_eq_m512(r, e);
44467    }
44468
44469    #[simd_test(enable = "avx512f,avx512vl")]
44470    unsafe fn test_mm256_mask_max_ps() {
44471        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44472        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44473        let r = _mm256_mask_max_ps(a, 0, a, b);
44474        assert_eq_m256(r, a);
44475        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
44476        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44477        assert_eq_m256(r, e);
44478    }
44479
44480    #[simd_test(enable = "avx512f,avx512vl")]
44481    unsafe fn test_mm256_maskz_max_ps() {
44482        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44483        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44484        let r = _mm256_maskz_max_ps(0, a, b);
44485        assert_eq_m256(r, _mm256_setzero_ps());
44486        let r = _mm256_maskz_max_ps(0b11111111, a, b);
44487        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44488        assert_eq_m256(r, e);
44489    }
44490
44491    #[simd_test(enable = "avx512f,avx512vl")]
44492    unsafe fn test_mm_mask_max_ps() {
44493        let a = _mm_set_ps(0., 1., 2., 3.);
44494        let b = _mm_set_ps(3., 2., 1., 0.);
44495        let r = _mm_mask_max_ps(a, 0, a, b);
44496        assert_eq_m128(r, a);
44497        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44498        let e = _mm_set_ps(3., 2., 2., 3.);
44499        assert_eq_m128(r, e);
44500    }
44501
44502    #[simd_test(enable = "avx512f,avx512vl")]
44503    unsafe fn test_mm_maskz_max_ps() {
44504        let a = _mm_set_ps(0., 1., 2., 3.);
44505        let b = _mm_set_ps(3., 2., 1., 0.);
44506        let r = _mm_maskz_max_ps(0, a, b);
44507        assert_eq_m128(r, _mm_setzero_ps());
44508        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44509        let e = _mm_set_ps(3., 2., 2., 3.);
44510        assert_eq_m128(r, e);
44511    }
44512
44513    #[simd_test(enable = "avx512f")]
44514    unsafe fn test_mm512_max_epu32() {
44515        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44516        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44517        let r = _mm512_max_epu32(a, b);
44518        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44519        assert_eq_m512i(r, e);
44520    }
44521
44522    #[simd_test(enable = "avx512f")]
44523    unsafe fn test_mm512_mask_max_epu32() {
44524        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44525        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44526        let r = _mm512_mask_max_epu32(a, 0, a, b);
44527        assert_eq_m512i(r, a);
44528        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
44529        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44530        assert_eq_m512i(r, e);
44531    }
44532
44533    #[simd_test(enable = "avx512f")]
44534    unsafe fn test_mm512_maskz_max_epu32() {
44535        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44536        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44537        let r = _mm512_maskz_max_epu32(0, a, b);
44538        assert_eq_m512i(r, _mm512_setzero_si512());
44539        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
44540        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44541        assert_eq_m512i(r, e);
44542    }
44543
44544    #[simd_test(enable = "avx512f,avx512vl")]
44545    unsafe fn test_mm256_mask_max_epu32() {
44546        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44547        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44548        let r = _mm256_mask_max_epu32(a, 0, a, b);
44549        assert_eq_m256i(r, a);
44550        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
44551        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44552        assert_eq_m256i(r, e);
44553    }
44554
44555    #[simd_test(enable = "avx512f,avx512vl")]
44556    unsafe fn test_mm256_maskz_max_epu32() {
44557        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44558        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44559        let r = _mm256_maskz_max_epu32(0, a, b);
44560        assert_eq_m256i(r, _mm256_setzero_si256());
44561        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
44562        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44563        assert_eq_m256i(r, e);
44564    }
44565
44566    #[simd_test(enable = "avx512f,avx512vl")]
44567    unsafe fn test_mm_mask_max_epu32() {
44568        let a = _mm_set_epi32(0, 1, 2, 3);
44569        let b = _mm_set_epi32(3, 2, 1, 0);
44570        let r = _mm_mask_max_epu32(a, 0, a, b);
44571        assert_eq_m128i(r, a);
44572        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
44573        let e = _mm_set_epi32(3, 2, 2, 3);
44574        assert_eq_m128i(r, e);
44575    }
44576
44577    #[simd_test(enable = "avx512f,avx512vl")]
44578    unsafe fn test_mm_maskz_max_epu32() {
44579        let a = _mm_set_epi32(0, 1, 2, 3);
44580        let b = _mm_set_epi32(3, 2, 1, 0);
44581        let r = _mm_maskz_max_epu32(0, a, b);
44582        assert_eq_m128i(r, _mm_setzero_si128());
44583        let r = _mm_maskz_max_epu32(0b00001111, a, b);
44584        let e = _mm_set_epi32(3, 2, 2, 3);
44585        assert_eq_m128i(r, e);
44586    }
44587
44588    #[simd_test(enable = "avx512f")]
44589    unsafe fn test_mm512_min_epi32() {
44590        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44591        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44592        let r = _mm512_min_epi32(a, b);
44593        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44594        assert_eq_m512i(r, e);
44595    }
44596
44597    #[simd_test(enable = "avx512f")]
44598    unsafe fn test_mm512_mask_min_epi32() {
44599        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44600        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44601        let r = _mm512_mask_min_epi32(a, 0, a, b);
44602        assert_eq_m512i(r, a);
44603        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
44604        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44605        assert_eq_m512i(r, e);
44606    }
44607
44608    #[simd_test(enable = "avx512f")]
44609    unsafe fn test_mm512_maskz_min_epi32() {
44610        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44611        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44612        let r = _mm512_maskz_min_epi32(0, a, b);
44613        assert_eq_m512i(r, _mm512_setzero_si512());
44614        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
44615        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44616        assert_eq_m512i(r, e);
44617    }
44618
44619    #[simd_test(enable = "avx512f,avx512vl")]
44620    unsafe fn test_mm256_mask_min_epi32() {
44621        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44622        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44623        let r = _mm256_mask_min_epi32(a, 0, a, b);
44624        assert_eq_m256i(r, a);
44625        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
44626        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44627        assert_eq_m256i(r, e);
44628    }
44629
44630    #[simd_test(enable = "avx512f,avx512vl")]
44631    unsafe fn test_mm256_maskz_min_epi32() {
44632        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44633        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44634        let r = _mm256_maskz_min_epi32(0, a, b);
44635        assert_eq_m256i(r, _mm256_setzero_si256());
44636        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
44637        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44638        assert_eq_m256i(r, e);
44639    }
44640
44641    #[simd_test(enable = "avx512f,avx512vl")]
44642    unsafe fn test_mm_mask_min_epi32() {
44643        let a = _mm_set_epi32(0, 1, 2, 3);
44644        let b = _mm_set_epi32(3, 2, 1, 0);
44645        let r = _mm_mask_min_epi32(a, 0, a, b);
44646        assert_eq_m128i(r, a);
44647        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
44648        let e = _mm_set_epi32(0, 1, 1, 0);
44649        assert_eq_m128i(r, e);
44650    }
44651
44652    #[simd_test(enable = "avx512f,avx512vl")]
44653    unsafe fn test_mm_maskz_min_epi32() {
44654        let a = _mm_set_epi32(0, 1, 2, 3);
44655        let b = _mm_set_epi32(3, 2, 1, 0);
44656        let r = _mm_maskz_min_epi32(0, a, b);
44657        assert_eq_m128i(r, _mm_setzero_si128());
44658        let r = _mm_maskz_min_epi32(0b00001111, a, b);
44659        let e = _mm_set_epi32(0, 1, 1, 0);
44660        assert_eq_m128i(r, e);
44661    }
44662
44663    #[simd_test(enable = "avx512f")]
44664    unsafe fn test_mm512_min_ps() {
44665        let a = _mm512_setr_ps(
44666            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44667        );
44668        let b = _mm512_setr_ps(
44669            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44670        );
44671        let r = _mm512_min_ps(a, b);
44672        let e = _mm512_setr_ps(
44673            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
44674        );
44675        assert_eq_m512(r, e);
44676    }
44677
44678    #[simd_test(enable = "avx512f")]
44679    unsafe fn test_mm512_mask_min_ps() {
44680        let a = _mm512_setr_ps(
44681            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44682        );
44683        let b = _mm512_setr_ps(
44684            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44685        );
44686        let r = _mm512_mask_min_ps(a, 0, a, b);
44687        assert_eq_m512(r, a);
44688        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
44689        let e = _mm512_setr_ps(
44690            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44691        );
44692        assert_eq_m512(r, e);
44693    }
44694
44695    #[simd_test(enable = "avx512f")]
44696    unsafe fn test_mm512_maskz_min_ps() {
44697        let a = _mm512_setr_ps(
44698            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44699        );
44700        let b = _mm512_setr_ps(
44701            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44702        );
44703        let r = _mm512_maskz_min_ps(0, a, b);
44704        assert_eq_m512(r, _mm512_setzero_ps());
44705        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
44706        let e = _mm512_setr_ps(
44707            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44708        );
44709        assert_eq_m512(r, e);
44710    }
44711
44712    #[simd_test(enable = "avx512f,avx512vl")]
44713    unsafe fn test_mm256_mask_min_ps() {
44714        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44715        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44716        let r = _mm256_mask_min_ps(a, 0, a, b);
44717        assert_eq_m256(r, a);
44718        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
44719        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44720        assert_eq_m256(r, e);
44721    }
44722
44723    #[simd_test(enable = "avx512f,avx512vl")]
44724    unsafe fn test_mm256_maskz_min_ps() {
44725        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44726        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44727        let r = _mm256_maskz_min_ps(0, a, b);
44728        assert_eq_m256(r, _mm256_setzero_ps());
44729        let r = _mm256_maskz_min_ps(0b11111111, a, b);
44730        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44731        assert_eq_m256(r, e);
44732    }
44733
44734    #[simd_test(enable = "avx512f,avx512vl")]
44735    unsafe fn test_mm_mask_min_ps() {
44736        let a = _mm_set_ps(0., 1., 2., 3.);
44737        let b = _mm_set_ps(3., 2., 1., 0.);
44738        let r = _mm_mask_min_ps(a, 0, a, b);
44739        assert_eq_m128(r, a);
44740        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
44741        let e = _mm_set_ps(0., 1., 1., 0.);
44742        assert_eq_m128(r, e);
44743    }
44744
44745    #[simd_test(enable = "avx512f,avx512vl")]
44746    unsafe fn test_mm_maskz_min_ps() {
44747        let a = _mm_set_ps(0., 1., 2., 3.);
44748        let b = _mm_set_ps(3., 2., 1., 0.);
44749        let r = _mm_maskz_min_ps(0, a, b);
44750        assert_eq_m128(r, _mm_setzero_ps());
44751        let r = _mm_maskz_min_ps(0b00001111, a, b);
44752        let e = _mm_set_ps(0., 1., 1., 0.);
44753        assert_eq_m128(r, e);
44754    }
44755
44756    #[simd_test(enable = "avx512f")]
44757    unsafe fn test_mm512_min_epu32() {
44758        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44759        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44760        let r = _mm512_min_epu32(a, b);
44761        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44762        assert_eq_m512i(r, e);
44763    }
44764
44765    #[simd_test(enable = "avx512f")]
44766    unsafe fn test_mm512_mask_min_epu32() {
44767        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44768        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44769        let r = _mm512_mask_min_epu32(a, 0, a, b);
44770        assert_eq_m512i(r, a);
44771        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
44772        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44773        assert_eq_m512i(r, e);
44774    }
44775
44776    #[simd_test(enable = "avx512f")]
44777    unsafe fn test_mm512_maskz_min_epu32() {
44778        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44779        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44780        let r = _mm512_maskz_min_epu32(0, a, b);
44781        assert_eq_m512i(r, _mm512_setzero_si512());
44782        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
44783        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44784        assert_eq_m512i(r, e);
44785    }
44786
44787    #[simd_test(enable = "avx512f,avx512vl")]
44788    unsafe fn test_mm256_mask_min_epu32() {
44789        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44790        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44791        let r = _mm256_mask_min_epu32(a, 0, a, b);
44792        assert_eq_m256i(r, a);
44793        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
44794        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44795        assert_eq_m256i(r, e);
44796    }
44797
44798    #[simd_test(enable = "avx512f,avx512vl")]
44799    unsafe fn test_mm256_maskz_min_epu32() {
44800        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44801        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44802        let r = _mm256_maskz_min_epu32(0, a, b);
44803        assert_eq_m256i(r, _mm256_setzero_si256());
44804        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
44805        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44806        assert_eq_m256i(r, e);
44807    }
44808
44809    #[simd_test(enable = "avx512f,avx512vl")]
44810    unsafe fn test_mm_mask_min_epu32() {
44811        let a = _mm_set_epi32(0, 1, 2, 3);
44812        let b = _mm_set_epi32(3, 2, 1, 0);
44813        let r = _mm_mask_min_epu32(a, 0, a, b);
44814        assert_eq_m128i(r, a);
44815        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
44816        let e = _mm_set_epi32(0, 1, 1, 0);
44817        assert_eq_m128i(r, e);
44818    }
44819
44820    #[simd_test(enable = "avx512f,avx512vl")]
44821    unsafe fn test_mm_maskz_min_epu32() {
44822        let a = _mm_set_epi32(0, 1, 2, 3);
44823        let b = _mm_set_epi32(3, 2, 1, 0);
44824        let r = _mm_maskz_min_epu32(0, a, b);
44825        assert_eq_m128i(r, _mm_setzero_si128());
44826        let r = _mm_maskz_min_epu32(0b00001111, a, b);
44827        let e = _mm_set_epi32(0, 1, 1, 0);
44828        assert_eq_m128i(r, e);
44829    }
44830
44831    #[simd_test(enable = "avx512f")]
44832    unsafe fn test_mm512_sqrt_ps() {
44833        let a = _mm512_setr_ps(
44834            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44835        );
44836        let r = _mm512_sqrt_ps(a);
44837        let e = _mm512_setr_ps(
44838            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44839        );
44840        assert_eq_m512(r, e);
44841    }
44842
44843    #[simd_test(enable = "avx512f")]
44844    unsafe fn test_mm512_mask_sqrt_ps() {
44845        let a = _mm512_setr_ps(
44846            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44847        );
44848        let r = _mm512_mask_sqrt_ps(a, 0, a);
44849        assert_eq_m512(r, a);
44850        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
44851        let e = _mm512_setr_ps(
44852            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
44853        );
44854        assert_eq_m512(r, e);
44855    }
44856
44857    #[simd_test(enable = "avx512f")]
44858    unsafe fn test_mm512_maskz_sqrt_ps() {
44859        let a = _mm512_setr_ps(
44860            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44861        );
44862        let r = _mm512_maskz_sqrt_ps(0, a);
44863        assert_eq_m512(r, _mm512_setzero_ps());
44864        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
44865        let e = _mm512_setr_ps(
44866            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44867        );
44868        assert_eq_m512(r, e);
44869    }
44870
44871    #[simd_test(enable = "avx512f,avx512vl")]
44872    unsafe fn test_mm256_mask_sqrt_ps() {
44873        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44874        let r = _mm256_mask_sqrt_ps(a, 0, a);
44875        assert_eq_m256(r, a);
44876        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
44877        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44878        assert_eq_m256(r, e);
44879    }
44880
44881    #[simd_test(enable = "avx512f,avx512vl")]
44882    unsafe fn test_mm256_maskz_sqrt_ps() {
44883        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44884        let r = _mm256_maskz_sqrt_ps(0, a);
44885        assert_eq_m256(r, _mm256_setzero_ps());
44886        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
44887        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44888        assert_eq_m256(r, e);
44889    }
44890
44891    #[simd_test(enable = "avx512f,avx512vl")]
44892    unsafe fn test_mm_mask_sqrt_ps() {
44893        let a = _mm_set_ps(0., 1., 4., 9.);
44894        let r = _mm_mask_sqrt_ps(a, 0, a);
44895        assert_eq_m128(r, a);
44896        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
44897        let e = _mm_set_ps(0., 1., 2., 3.);
44898        assert_eq_m128(r, e);
44899    }
44900
44901    #[simd_test(enable = "avx512f,avx512vl")]
44902    unsafe fn test_mm_maskz_sqrt_ps() {
44903        let a = _mm_set_ps(0., 1., 4., 9.);
44904        let r = _mm_maskz_sqrt_ps(0, a);
44905        assert_eq_m128(r, _mm_setzero_ps());
44906        let r = _mm_maskz_sqrt_ps(0b00001111, a);
44907        let e = _mm_set_ps(0., 1., 2., 3.);
44908        assert_eq_m128(r, e);
44909    }
44910
44911    #[simd_test(enable = "avx512f")]
44912    unsafe fn test_mm512_fmadd_ps() {
44913        let a = _mm512_set1_ps(1.);
44914        let b = _mm512_setr_ps(
44915            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44916        );
44917        let c = _mm512_set1_ps(1.);
44918        let r = _mm512_fmadd_ps(a, b, c);
44919        let e = _mm512_setr_ps(
44920            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
44921        );
44922        assert_eq_m512(r, e);
44923    }
44924
44925    #[simd_test(enable = "avx512f")]
44926    unsafe fn test_mm512_mask_fmadd_ps() {
44927        let a = _mm512_set1_ps(1.);
44928        let b = _mm512_setr_ps(
44929            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44930        );
44931        let c = _mm512_set1_ps(1.);
44932        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
44933        assert_eq_m512(r, a);
44934        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
44935        let e = _mm512_setr_ps(
44936            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
44937        );
44938        assert_eq_m512(r, e);
44939    }
44940
44941    #[simd_test(enable = "avx512f")]
44942    unsafe fn test_mm512_maskz_fmadd_ps() {
44943        let a = _mm512_set1_ps(1.);
44944        let b = _mm512_setr_ps(
44945            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44946        );
44947        let c = _mm512_set1_ps(1.);
44948        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
44949        assert_eq_m512(r, _mm512_setzero_ps());
44950        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
44951        let e = _mm512_setr_ps(
44952            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44953        );
44954        assert_eq_m512(r, e);
44955    }
44956
44957    #[simd_test(enable = "avx512f")]
44958    unsafe fn test_mm512_mask3_fmadd_ps() {
44959        let a = _mm512_set1_ps(1.);
44960        let b = _mm512_setr_ps(
44961            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44962        );
44963        let c = _mm512_set1_ps(2.);
44964        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
44965        assert_eq_m512(r, c);
44966        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
44967        let e = _mm512_setr_ps(
44968            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
44969        );
44970        assert_eq_m512(r, e);
44971    }
44972
44973    #[simd_test(enable = "avx512f,avx512vl")]
44974    unsafe fn test_mm256_mask_fmadd_ps() {
44975        let a = _mm256_set1_ps(1.);
44976        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44977        let c = _mm256_set1_ps(1.);
44978        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
44979        assert_eq_m256(r, a);
44980        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
44981        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44982        assert_eq_m256(r, e);
44983    }
44984
44985    #[simd_test(enable = "avx512f,avx512vl")]
44986    unsafe fn test_mm256_maskz_fmadd_ps() {
44987        let a = _mm256_set1_ps(1.);
44988        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44989        let c = _mm256_set1_ps(1.);
44990        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
44991        assert_eq_m256(r, _mm256_setzero_ps());
44992        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
44993        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44994        assert_eq_m256(r, e);
44995    }
44996
44997    #[simd_test(enable = "avx512f,avx512vl")]
44998    unsafe fn test_mm256_mask3_fmadd_ps() {
44999        let a = _mm256_set1_ps(1.);
45000        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45001        let c = _mm256_set1_ps(1.);
45002        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
45003        assert_eq_m256(r, c);
45004        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
45005        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45006        assert_eq_m256(r, e);
45007    }
45008
45009    #[simd_test(enable = "avx512f,avx512vl")]
45010    unsafe fn test_mm_mask_fmadd_ps() {
45011        let a = _mm_set1_ps(1.);
45012        let b = _mm_set_ps(0., 1., 2., 3.);
45013        let c = _mm_set1_ps(1.);
45014        let r = _mm_mask_fmadd_ps(a, 0, b, c);
45015        assert_eq_m128(r, a);
45016        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
45017        let e = _mm_set_ps(1., 2., 3., 4.);
45018        assert_eq_m128(r, e);
45019    }
45020
45021    #[simd_test(enable = "avx512f,avx512vl")]
45022    unsafe fn test_mm_maskz_fmadd_ps() {
45023        let a = _mm_set1_ps(1.);
45024        let b = _mm_set_ps(0., 1., 2., 3.);
45025        let c = _mm_set1_ps(1.);
45026        let r = _mm_maskz_fmadd_ps(0, a, b, c);
45027        assert_eq_m128(r, _mm_setzero_ps());
45028        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
45029        let e = _mm_set_ps(1., 2., 3., 4.);
45030        assert_eq_m128(r, e);
45031    }
45032
45033    #[simd_test(enable = "avx512f,avx512vl")]
45034    unsafe fn test_mm_mask3_fmadd_ps() {
45035        let a = _mm_set1_ps(1.);
45036        let b = _mm_set_ps(0., 1., 2., 3.);
45037        let c = _mm_set1_ps(1.);
45038        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
45039        assert_eq_m128(r, c);
45040        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
45041        let e = _mm_set_ps(1., 2., 3., 4.);
45042        assert_eq_m128(r, e);
45043    }
45044
45045    #[simd_test(enable = "avx512f")]
45046    unsafe fn test_mm512_fmsub_ps() {
45047        let a = _mm512_setr_ps(
45048            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45049        );
45050        let b = _mm512_setr_ps(
45051            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45052        );
45053        let c = _mm512_setr_ps(
45054            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45055        );
45056        let r = _mm512_fmsub_ps(a, b, c);
45057        let e = _mm512_setr_ps(
45058            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
45059        );
45060        assert_eq_m512(r, e);
45061    }
45062
45063    #[simd_test(enable = "avx512f")]
45064    unsafe fn test_mm512_mask_fmsub_ps() {
45065        let a = _mm512_set1_ps(1.);
45066        let b = _mm512_setr_ps(
45067            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45068        );
45069        let c = _mm512_set1_ps(1.);
45070        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
45071        assert_eq_m512(r, a);
45072        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
45073        let e = _mm512_setr_ps(
45074            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45075        );
45076        assert_eq_m512(r, e);
45077    }
45078
45079    #[simd_test(enable = "avx512f")]
45080    unsafe fn test_mm512_maskz_fmsub_ps() {
45081        let a = _mm512_set1_ps(1.);
45082        let b = _mm512_setr_ps(
45083            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45084        );
45085        let c = _mm512_set1_ps(1.);
45086        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
45087        assert_eq_m512(r, _mm512_setzero_ps());
45088        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
45089        let e = _mm512_setr_ps(
45090            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45091        );
45092        assert_eq_m512(r, e);
45093    }
45094
45095    #[simd_test(enable = "avx512f")]
45096    unsafe fn test_mm512_mask3_fmsub_ps() {
45097        let a = _mm512_set1_ps(1.);
45098        let b = _mm512_setr_ps(
45099            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45100        );
45101        let c = _mm512_setr_ps(
45102            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45103        );
45104        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
45105        assert_eq_m512(r, c);
45106        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
45107        let e = _mm512_setr_ps(
45108            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45109        );
45110        assert_eq_m512(r, e);
45111    }
45112
45113    #[simd_test(enable = "avx512f,avx512vl")]
45114    unsafe fn test_mm256_mask_fmsub_ps() {
45115        let a = _mm256_set1_ps(1.);
45116        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45117        let c = _mm256_set1_ps(1.);
45118        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
45119        assert_eq_m256(r, a);
45120        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
45121        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45122        assert_eq_m256(r, e);
45123    }
45124
45125    #[simd_test(enable = "avx512f,avx512vl")]
45126    unsafe fn test_mm256_maskz_fmsub_ps() {
45127        let a = _mm256_set1_ps(1.);
45128        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45129        let c = _mm256_set1_ps(1.);
45130        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
45131        assert_eq_m256(r, _mm256_setzero_ps());
45132        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
45133        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45134        assert_eq_m256(r, e);
45135    }
45136
45137    #[simd_test(enable = "avx512f,avx512vl")]
45138    unsafe fn test_mm256_mask3_fmsub_ps() {
45139        let a = _mm256_set1_ps(1.);
45140        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45141        let c = _mm256_set1_ps(1.);
45142        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
45143        assert_eq_m256(r, c);
45144        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
45145        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45146        assert_eq_m256(r, e);
45147    }
45148
45149    #[simd_test(enable = "avx512f,avx512vl")]
45150    unsafe fn test_mm_mask_fmsub_ps() {
45151        let a = _mm_set1_ps(1.);
45152        let b = _mm_set_ps(0., 1., 2., 3.);
45153        let c = _mm_set1_ps(1.);
45154        let r = _mm_mask_fmsub_ps(a, 0, b, c);
45155        assert_eq_m128(r, a);
45156        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
45157        let e = _mm_set_ps(-1., 0., 1., 2.);
45158        assert_eq_m128(r, e);
45159    }
45160
45161    #[simd_test(enable = "avx512f,avx512vl")]
45162    unsafe fn test_mm_maskz_fmsub_ps() {
45163        let a = _mm_set1_ps(1.);
45164        let b = _mm_set_ps(0., 1., 2., 3.);
45165        let c = _mm_set1_ps(1.);
45166        let r = _mm_maskz_fmsub_ps(0, a, b, c);
45167        assert_eq_m128(r, _mm_setzero_ps());
45168        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
45169        let e = _mm_set_ps(-1., 0., 1., 2.);
45170        assert_eq_m128(r, e);
45171    }
45172
45173    #[simd_test(enable = "avx512f,avx512vl")]
45174    unsafe fn test_mm_mask3_fmsub_ps() {
45175        let a = _mm_set1_ps(1.);
45176        let b = _mm_set_ps(0., 1., 2., 3.);
45177        let c = _mm_set1_ps(1.);
45178        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
45179        assert_eq_m128(r, c);
45180        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
45181        let e = _mm_set_ps(-1., 0., 1., 2.);
45182        assert_eq_m128(r, e);
45183    }
45184
45185    #[simd_test(enable = "avx512f")]
45186    unsafe fn test_mm512_fmaddsub_ps() {
45187        let a = _mm512_set1_ps(1.);
45188        let b = _mm512_setr_ps(
45189            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45190        );
45191        let c = _mm512_set1_ps(1.);
45192        let r = _mm512_fmaddsub_ps(a, b, c);
45193        let e = _mm512_setr_ps(
45194            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
45195        );
45196        assert_eq_m512(r, e);
45197    }
45198
45199    #[simd_test(enable = "avx512f")]
45200    unsafe fn test_mm512_mask_fmaddsub_ps() {
45201        let a = _mm512_set1_ps(1.);
45202        let b = _mm512_setr_ps(
45203            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45204        );
45205        let c = _mm512_set1_ps(1.);
45206        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
45207        assert_eq_m512(r, a);
45208        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
45209        let e = _mm512_setr_ps(
45210            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45211        );
45212        assert_eq_m512(r, e);
45213    }
45214
45215    #[simd_test(enable = "avx512f")]
45216    unsafe fn test_mm512_maskz_fmaddsub_ps() {
45217        let a = _mm512_set1_ps(1.);
45218        let b = _mm512_setr_ps(
45219            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45220        );
45221        let c = _mm512_set1_ps(1.);
45222        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
45223        assert_eq_m512(r, _mm512_setzero_ps());
45224        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
45225        let e = _mm512_setr_ps(
45226            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45227        );
45228        assert_eq_m512(r, e);
45229    }
45230
45231    #[simd_test(enable = "avx512f")]
45232    unsafe fn test_mm512_mask3_fmaddsub_ps() {
45233        let a = _mm512_set1_ps(1.);
45234        let b = _mm512_setr_ps(
45235            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45236        );
45237        let c = _mm512_setr_ps(
45238            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45239        );
45240        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
45241        assert_eq_m512(r, c);
45242        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
45243        let e = _mm512_setr_ps(
45244            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
45245        );
45246        assert_eq_m512(r, e);
45247    }
45248
45249    #[simd_test(enable = "avx512f,avx512vl")]
45250    unsafe fn test_mm256_mask_fmaddsub_ps() {
45251        let a = _mm256_set1_ps(1.);
45252        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45253        let c = _mm256_set1_ps(1.);
45254        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
45255        assert_eq_m256(r, a);
45256        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
45257        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45258        assert_eq_m256(r, e);
45259    }
45260
45261    #[simd_test(enable = "avx512f,avx512vl")]
45262    unsafe fn test_mm256_maskz_fmaddsub_ps() {
45263        let a = _mm256_set1_ps(1.);
45264        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45265        let c = _mm256_set1_ps(1.);
45266        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
45267        assert_eq_m256(r, _mm256_setzero_ps());
45268        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
45269        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45270        assert_eq_m256(r, e);
45271    }
45272
45273    #[simd_test(enable = "avx512f,avx512vl")]
45274    unsafe fn test_mm256_mask3_fmaddsub_ps() {
45275        let a = _mm256_set1_ps(1.);
45276        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45277        let c = _mm256_set1_ps(1.);
45278        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
45279        assert_eq_m256(r, c);
45280        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
45281        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45282        assert_eq_m256(r, e);
45283    }
45284
45285    #[simd_test(enable = "avx512f,avx512vl")]
45286    unsafe fn test_mm_mask_fmaddsub_ps() {
45287        let a = _mm_set1_ps(1.);
45288        let b = _mm_set_ps(0., 1., 2., 3.);
45289        let c = _mm_set1_ps(1.);
45290        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
45291        assert_eq_m128(r, a);
45292        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
45293        let e = _mm_set_ps(1., 0., 3., 2.);
45294        assert_eq_m128(r, e);
45295    }
45296
45297    #[simd_test(enable = "avx512f,avx512vl")]
45298    unsafe fn test_mm_maskz_fmaddsub_ps() {
45299        let a = _mm_set1_ps(1.);
45300        let b = _mm_set_ps(0., 1., 2., 3.);
45301        let c = _mm_set1_ps(1.);
45302        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
45303        assert_eq_m128(r, _mm_setzero_ps());
45304        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
45305        let e = _mm_set_ps(1., 0., 3., 2.);
45306        assert_eq_m128(r, e);
45307    }
45308
45309    #[simd_test(enable = "avx512f,avx512vl")]
45310    unsafe fn test_mm_mask3_fmaddsub_ps() {
45311        let a = _mm_set1_ps(1.);
45312        let b = _mm_set_ps(0., 1., 2., 3.);
45313        let c = _mm_set1_ps(1.);
45314        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
45315        assert_eq_m128(r, c);
45316        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
45317        let e = _mm_set_ps(1., 0., 3., 2.);
45318        assert_eq_m128(r, e);
45319    }
45320
45321    #[simd_test(enable = "avx512f")]
45322    unsafe fn test_mm512_fmsubadd_ps() {
45323        let a = _mm512_setr_ps(
45324            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45325        );
45326        let b = _mm512_setr_ps(
45327            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45328        );
45329        let c = _mm512_setr_ps(
45330            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45331        );
45332        let r = _mm512_fmsubadd_ps(a, b, c);
45333        let e = _mm512_setr_ps(
45334            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
45335        );
45336        assert_eq_m512(r, e);
45337    }
45338
45339    #[simd_test(enable = "avx512f")]
45340    unsafe fn test_mm512_mask_fmsubadd_ps() {
45341        let a = _mm512_set1_ps(1.);
45342        let b = _mm512_setr_ps(
45343            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45344        );
45345        let c = _mm512_set1_ps(1.);
45346        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
45347        assert_eq_m512(r, a);
45348        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
45349        let e = _mm512_setr_ps(
45350            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45351        );
45352        assert_eq_m512(r, e);
45353    }
45354
45355    #[simd_test(enable = "avx512f")]
45356    unsafe fn test_mm512_maskz_fmsubadd_ps() {
45357        let a = _mm512_set1_ps(1.);
45358        let b = _mm512_setr_ps(
45359            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45360        );
45361        let c = _mm512_set1_ps(1.);
45362        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
45363        assert_eq_m512(r, _mm512_setzero_ps());
45364        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
45365        let e = _mm512_setr_ps(
45366            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45367        );
45368        assert_eq_m512(r, e);
45369    }
45370
45371    #[simd_test(enable = "avx512f")]
45372    unsafe fn test_mm512_mask3_fmsubadd_ps() {
45373        let a = _mm512_set1_ps(1.);
45374        let b = _mm512_setr_ps(
45375            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45376        );
45377        let c = _mm512_setr_ps(
45378            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45379        );
45380        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
45381        assert_eq_m512(r, c);
45382        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
45383        let e = _mm512_setr_ps(
45384            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45385        );
45386        assert_eq_m512(r, e);
45387    }
45388
45389    #[simd_test(enable = "avx512f,avx512vl")]
45390    unsafe fn test_mm256_mask_fmsubadd_ps() {
45391        let a = _mm256_set1_ps(1.);
45392        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45393        let c = _mm256_set1_ps(1.);
45394        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
45395        assert_eq_m256(r, a);
45396        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
45397        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45398        assert_eq_m256(r, e);
45399    }
45400
45401    #[simd_test(enable = "avx512f,avx512vl")]
45402    unsafe fn test_mm256_maskz_fmsubadd_ps() {
45403        let a = _mm256_set1_ps(1.);
45404        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45405        let c = _mm256_set1_ps(1.);
45406        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
45407        assert_eq_m256(r, _mm256_setzero_ps());
45408        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
45409        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45410        assert_eq_m256(r, e);
45411    }
45412
45413    #[simd_test(enable = "avx512f,avx512vl")]
45414    unsafe fn test_mm256_mask3_fmsubadd_ps() {
45415        let a = _mm256_set1_ps(1.);
45416        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45417        let c = _mm256_set1_ps(1.);
45418        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
45419        assert_eq_m256(r, c);
45420        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
45421        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45422        assert_eq_m256(r, e);
45423    }
45424
45425    #[simd_test(enable = "avx512f,avx512vl")]
45426    unsafe fn test_mm_mask_fmsubadd_ps() {
45427        let a = _mm_set1_ps(1.);
45428        let b = _mm_set_ps(0., 1., 2., 3.);
45429        let c = _mm_set1_ps(1.);
45430        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
45431        assert_eq_m128(r, a);
45432        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
45433        let e = _mm_set_ps(-1., 2., 1., 4.);
45434        assert_eq_m128(r, e);
45435    }
45436
45437    #[simd_test(enable = "avx512f,avx512vl")]
45438    unsafe fn test_mm_maskz_fmsubadd_ps() {
45439        let a = _mm_set1_ps(1.);
45440        let b = _mm_set_ps(0., 1., 2., 3.);
45441        let c = _mm_set1_ps(1.);
45442        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
45443        assert_eq_m128(r, _mm_setzero_ps());
45444        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
45445        let e = _mm_set_ps(-1., 2., 1., 4.);
45446        assert_eq_m128(r, e);
45447    }
45448
45449    #[simd_test(enable = "avx512f,avx512vl")]
45450    unsafe fn test_mm_mask3_fmsubadd_ps() {
45451        let a = _mm_set1_ps(1.);
45452        let b = _mm_set_ps(0., 1., 2., 3.);
45453        let c = _mm_set1_ps(1.);
45454        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
45455        assert_eq_m128(r, c);
45456        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
45457        let e = _mm_set_ps(-1., 2., 1., 4.);
45458        assert_eq_m128(r, e);
45459    }
45460
45461    #[simd_test(enable = "avx512f")]
45462    unsafe fn test_mm512_fnmadd_ps() {
45463        let a = _mm512_set1_ps(1.);
45464        let b = _mm512_setr_ps(
45465            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45466        );
45467        let c = _mm512_set1_ps(1.);
45468        let r = _mm512_fnmadd_ps(a, b, c);
45469        let e = _mm512_setr_ps(
45470            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
45471        );
45472        assert_eq_m512(r, e);
45473    }
45474
45475    #[simd_test(enable = "avx512f")]
45476    unsafe fn test_mm512_mask_fnmadd_ps() {
45477        let a = _mm512_set1_ps(1.);
45478        let b = _mm512_setr_ps(
45479            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45480        );
45481        let c = _mm512_set1_ps(1.);
45482        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
45483        assert_eq_m512(r, a);
45484        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
45485        let e = _mm512_setr_ps(
45486            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
45487        );
45488        assert_eq_m512(r, e);
45489    }
45490
45491    #[simd_test(enable = "avx512f")]
45492    unsafe fn test_mm512_maskz_fnmadd_ps() {
45493        let a = _mm512_set1_ps(1.);
45494        let b = _mm512_setr_ps(
45495            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45496        );
45497        let c = _mm512_set1_ps(1.);
45498        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
45499        assert_eq_m512(r, _mm512_setzero_ps());
45500        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
45501        let e = _mm512_setr_ps(
45502            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
45503        );
45504        assert_eq_m512(r, e);
45505    }
45506
45507    #[simd_test(enable = "avx512f")]
45508    unsafe fn test_mm512_mask3_fnmadd_ps() {
45509        let a = _mm512_set1_ps(1.);
45510        let b = _mm512_setr_ps(
45511            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45512        );
45513        let c = _mm512_setr_ps(
45514            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45515        );
45516        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
45517        assert_eq_m512(r, c);
45518        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
45519        let e = _mm512_setr_ps(
45520            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
45521        );
45522        assert_eq_m512(r, e);
45523    }
45524
45525    #[simd_test(enable = "avx512f,avx512vl")]
45526    unsafe fn test_mm256_mask_fnmadd_ps() {
45527        let a = _mm256_set1_ps(1.);
45528        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45529        let c = _mm256_set1_ps(1.);
45530        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
45531        assert_eq_m256(r, a);
45532        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
45533        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45534        assert_eq_m256(r, e);
45535    }
45536
45537    #[simd_test(enable = "avx512f,avx512vl")]
45538    unsafe fn test_mm256_maskz_fnmadd_ps() {
45539        let a = _mm256_set1_ps(1.);
45540        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45541        let c = _mm256_set1_ps(1.);
45542        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
45543        assert_eq_m256(r, _mm256_setzero_ps());
45544        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
45545        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45546        assert_eq_m256(r, e);
45547    }
45548
45549    #[simd_test(enable = "avx512f,avx512vl")]
45550    unsafe fn test_mm256_mask3_fnmadd_ps() {
45551        let a = _mm256_set1_ps(1.);
45552        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45553        let c = _mm256_set1_ps(1.);
45554        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
45555        assert_eq_m256(r, c);
45556        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
45557        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45558        assert_eq_m256(r, e);
45559    }
45560
45561    #[simd_test(enable = "avx512f,avx512vl")]
45562    unsafe fn test_mm_mask_fnmadd_ps() {
45563        let a = _mm_set1_ps(1.);
45564        let b = _mm_set_ps(0., 1., 2., 3.);
45565        let c = _mm_set1_ps(1.);
45566        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
45567        assert_eq_m128(r, a);
45568        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
45569        let e = _mm_set_ps(1., 0., -1., -2.);
45570        assert_eq_m128(r, e);
45571    }
45572
45573    #[simd_test(enable = "avx512f,avx512vl")]
45574    unsafe fn test_mm_maskz_fnmadd_ps() {
45575        let a = _mm_set1_ps(1.);
45576        let b = _mm_set_ps(0., 1., 2., 3.);
45577        let c = _mm_set1_ps(1.);
45578        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
45579        assert_eq_m128(r, _mm_setzero_ps());
45580        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
45581        let e = _mm_set_ps(1., 0., -1., -2.);
45582        assert_eq_m128(r, e);
45583    }
45584
45585    #[simd_test(enable = "avx512f,avx512vl")]
45586    unsafe fn test_mm_mask3_fnmadd_ps() {
45587        let a = _mm_set1_ps(1.);
45588        let b = _mm_set_ps(0., 1., 2., 3.);
45589        let c = _mm_set1_ps(1.);
45590        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
45591        assert_eq_m128(r, c);
45592        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
45593        let e = _mm_set_ps(1., 0., -1., -2.);
45594        assert_eq_m128(r, e);
45595    }
45596
45597    #[simd_test(enable = "avx512f")]
45598    unsafe fn test_mm512_fnmsub_ps() {
45599        let a = _mm512_set1_ps(1.);
45600        let b = _mm512_setr_ps(
45601            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45602        );
45603        let c = _mm512_set1_ps(1.);
45604        let r = _mm512_fnmsub_ps(a, b, c);
45605        let e = _mm512_setr_ps(
45606            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
45607        );
45608        assert_eq_m512(r, e);
45609    }
45610
45611    #[simd_test(enable = "avx512f")]
45612    unsafe fn test_mm512_mask_fnmsub_ps() {
45613        let a = _mm512_set1_ps(1.);
45614        let b = _mm512_setr_ps(
45615            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45616        );
45617        let c = _mm512_set1_ps(1.);
45618        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
45619        assert_eq_m512(r, a);
45620        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
45621        let e = _mm512_setr_ps(
45622            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
45623        );
45624        assert_eq_m512(r, e);
45625    }
45626
45627    #[simd_test(enable = "avx512f")]
45628    unsafe fn test_mm512_maskz_fnmsub_ps() {
45629        let a = _mm512_set1_ps(1.);
45630        let b = _mm512_setr_ps(
45631            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45632        );
45633        let c = _mm512_set1_ps(1.);
45634        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
45635        assert_eq_m512(r, _mm512_setzero_ps());
45636        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
45637        let e = _mm512_setr_ps(
45638            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
45639        );
45640        assert_eq_m512(r, e);
45641    }
45642
45643    #[simd_test(enable = "avx512f")]
45644    unsafe fn test_mm512_mask3_fnmsub_ps() {
45645        let a = _mm512_set1_ps(1.);
45646        let b = _mm512_setr_ps(
45647            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45648        );
45649        let c = _mm512_setr_ps(
45650            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45651        );
45652        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
45653        assert_eq_m512(r, c);
45654        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
45655        let e = _mm512_setr_ps(
45656            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
45657        );
45658        assert_eq_m512(r, e);
45659    }
45660
45661    #[simd_test(enable = "avx512f,avx512vl")]
45662    unsafe fn test_mm256_mask_fnmsub_ps() {
45663        let a = _mm256_set1_ps(1.);
45664        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45665        let c = _mm256_set1_ps(1.);
45666        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
45667        assert_eq_m256(r, a);
45668        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
45669        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45670        assert_eq_m256(r, e);
45671    }
45672
45673    #[simd_test(enable = "avx512f,avx512vl")]
45674    unsafe fn test_mm256_maskz_fnmsub_ps() {
45675        let a = _mm256_set1_ps(1.);
45676        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45677        let c = _mm256_set1_ps(1.);
45678        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
45679        assert_eq_m256(r, _mm256_setzero_ps());
45680        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
45681        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45682        assert_eq_m256(r, e);
45683    }
45684
45685    #[simd_test(enable = "avx512f,avx512vl")]
45686    unsafe fn test_mm256_mask3_fnmsub_ps() {
45687        let a = _mm256_set1_ps(1.);
45688        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45689        let c = _mm256_set1_ps(1.);
45690        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
45691        assert_eq_m256(r, c);
45692        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
45693        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45694        assert_eq_m256(r, e);
45695    }
45696
45697    #[simd_test(enable = "avx512f,avx512vl")]
45698    unsafe fn test_mm_mask_fnmsub_ps() {
45699        let a = _mm_set1_ps(1.);
45700        let b = _mm_set_ps(0., 1., 2., 3.);
45701        let c = _mm_set1_ps(1.);
45702        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
45703        assert_eq_m128(r, a);
45704        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
45705        let e = _mm_set_ps(-1., -2., -3., -4.);
45706        assert_eq_m128(r, e);
45707    }
45708
45709    #[simd_test(enable = "avx512f,avx512vl")]
45710    unsafe fn test_mm_maskz_fnmsub_ps() {
45711        let a = _mm_set1_ps(1.);
45712        let b = _mm_set_ps(0., 1., 2., 3.);
45713        let c = _mm_set1_ps(1.);
45714        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
45715        assert_eq_m128(r, _mm_setzero_ps());
45716        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
45717        let e = _mm_set_ps(-1., -2., -3., -4.);
45718        assert_eq_m128(r, e);
45719    }
45720
45721    #[simd_test(enable = "avx512f,avx512vl")]
45722    unsafe fn test_mm_mask3_fnmsub_ps() {
45723        let a = _mm_set1_ps(1.);
45724        let b = _mm_set_ps(0., 1., 2., 3.);
45725        let c = _mm_set1_ps(1.);
45726        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
45727        assert_eq_m128(r, c);
45728        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
45729        let e = _mm_set_ps(-1., -2., -3., -4.);
45730        assert_eq_m128(r, e);
45731    }
45732
45733    #[simd_test(enable = "avx512f")]
45734    unsafe fn test_mm512_rcp14_ps() {
45735        let a = _mm512_set1_ps(3.);
45736        let r = _mm512_rcp14_ps(a);
45737        let e = _mm512_set1_ps(0.33333206);
45738        assert_eq_m512(r, e);
45739    }
45740
45741    #[simd_test(enable = "avx512f")]
45742    unsafe fn test_mm512_mask_rcp14_ps() {
45743        let a = _mm512_set1_ps(3.);
45744        let r = _mm512_mask_rcp14_ps(a, 0, a);
45745        assert_eq_m512(r, a);
45746        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
45747        let e = _mm512_setr_ps(
45748            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45749            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45750        );
45751        assert_eq_m512(r, e);
45752    }
45753
45754    #[simd_test(enable = "avx512f")]
45755    unsafe fn test_mm512_maskz_rcp14_ps() {
45756        let a = _mm512_set1_ps(3.);
45757        let r = _mm512_maskz_rcp14_ps(0, a);
45758        assert_eq_m512(r, _mm512_setzero_ps());
45759        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
45760        let e = _mm512_setr_ps(
45761            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45762            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45763        );
45764        assert_eq_m512(r, e);
45765    }
45766
45767    #[simd_test(enable = "avx512f,avx512vl")]
45768    unsafe fn test_mm256_rcp14_ps() {
45769        let a = _mm256_set1_ps(3.);
45770        let r = _mm256_rcp14_ps(a);
45771        let e = _mm256_set1_ps(0.33333206);
45772        assert_eq_m256(r, e);
45773    }
45774
45775    #[simd_test(enable = "avx512f,avx512vl")]
45776    unsafe fn test_mm256_mask_rcp14_ps() {
45777        let a = _mm256_set1_ps(3.);
45778        let r = _mm256_mask_rcp14_ps(a, 0, a);
45779        assert_eq_m256(r, a);
45780        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
45781        let e = _mm256_set1_ps(0.33333206);
45782        assert_eq_m256(r, e);
45783    }
45784
45785    #[simd_test(enable = "avx512f,avx512vl")]
45786    unsafe fn test_mm256_maskz_rcp14_ps() {
45787        let a = _mm256_set1_ps(3.);
45788        let r = _mm256_maskz_rcp14_ps(0, a);
45789        assert_eq_m256(r, _mm256_setzero_ps());
45790        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
45791        let e = _mm256_set1_ps(0.33333206);
45792        assert_eq_m256(r, e);
45793    }
45794
45795    #[simd_test(enable = "avx512f,avx512vl")]
45796    unsafe fn test_mm_rcp14_ps() {
45797        let a = _mm_set1_ps(3.);
45798        let r = _mm_rcp14_ps(a);
45799        let e = _mm_set1_ps(0.33333206);
45800        assert_eq_m128(r, e);
45801    }
45802
45803    #[simd_test(enable = "avx512f,avx512vl")]
45804    unsafe fn test_mm_mask_rcp14_ps() {
45805        let a = _mm_set1_ps(3.);
45806        let r = _mm_mask_rcp14_ps(a, 0, a);
45807        assert_eq_m128(r, a);
45808        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
45809        let e = _mm_set1_ps(0.33333206);
45810        assert_eq_m128(r, e);
45811    }
45812
45813    #[simd_test(enable = "avx512f,avx512vl")]
45814    unsafe fn test_mm_maskz_rcp14_ps() {
45815        let a = _mm_set1_ps(3.);
45816        let r = _mm_maskz_rcp14_ps(0, a);
45817        assert_eq_m128(r, _mm_setzero_ps());
45818        let r = _mm_maskz_rcp14_ps(0b00001111, a);
45819        let e = _mm_set1_ps(0.33333206);
45820        assert_eq_m128(r, e);
45821    }
45822
45823    #[simd_test(enable = "avx512f")]
45824    unsafe fn test_mm512_rsqrt14_ps() {
45825        let a = _mm512_set1_ps(3.);
45826        let r = _mm512_rsqrt14_ps(a);
45827        let e = _mm512_set1_ps(0.5773392);
45828        assert_eq_m512(r, e);
45829    }
45830
45831    #[simd_test(enable = "avx512f")]
45832    unsafe fn test_mm512_mask_rsqrt14_ps() {
45833        let a = _mm512_set1_ps(3.);
45834        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
45835        assert_eq_m512(r, a);
45836        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
45837        let e = _mm512_setr_ps(
45838            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45839            0.5773392, 0.5773392, 0.5773392,
45840        );
45841        assert_eq_m512(r, e);
45842    }
45843
45844    #[simd_test(enable = "avx512f")]
45845    unsafe fn test_mm512_maskz_rsqrt14_ps() {
45846        let a = _mm512_set1_ps(3.);
45847        let r = _mm512_maskz_rsqrt14_ps(0, a);
45848        assert_eq_m512(r, _mm512_setzero_ps());
45849        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
45850        let e = _mm512_setr_ps(
45851            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45852            0.5773392, 0.5773392, 0.5773392,
45853        );
45854        assert_eq_m512(r, e);
45855    }
45856
45857    #[simd_test(enable = "avx512f,avx512vl")]
45858    unsafe fn test_mm256_rsqrt14_ps() {
45859        let a = _mm256_set1_ps(3.);
45860        let r = _mm256_rsqrt14_ps(a);
45861        let e = _mm256_set1_ps(0.5773392);
45862        assert_eq_m256(r, e);
45863    }
45864
45865    #[simd_test(enable = "avx512f,avx512vl")]
45866    unsafe fn test_mm256_mask_rsqrt14_ps() {
45867        let a = _mm256_set1_ps(3.);
45868        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
45869        assert_eq_m256(r, a);
45870        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
45871        let e = _mm256_set1_ps(0.5773392);
45872        assert_eq_m256(r, e);
45873    }
45874
45875    #[simd_test(enable = "avx512f,avx512vl")]
45876    unsafe fn test_mm256_maskz_rsqrt14_ps() {
45877        let a = _mm256_set1_ps(3.);
45878        let r = _mm256_maskz_rsqrt14_ps(0, a);
45879        assert_eq_m256(r, _mm256_setzero_ps());
45880        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
45881        let e = _mm256_set1_ps(0.5773392);
45882        assert_eq_m256(r, e);
45883    }
45884
45885    #[simd_test(enable = "avx512f,avx512vl")]
45886    unsafe fn test_mm_rsqrt14_ps() {
45887        let a = _mm_set1_ps(3.);
45888        let r = _mm_rsqrt14_ps(a);
45889        let e = _mm_set1_ps(0.5773392);
45890        assert_eq_m128(r, e);
45891    }
45892
45893    #[simd_test(enable = "avx512f,avx512vl")]
45894    unsafe fn test_mm_mask_rsqrt14_ps() {
45895        let a = _mm_set1_ps(3.);
45896        let r = _mm_mask_rsqrt14_ps(a, 0, a);
45897        assert_eq_m128(r, a);
45898        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
45899        let e = _mm_set1_ps(0.5773392);
45900        assert_eq_m128(r, e);
45901    }
45902
45903    #[simd_test(enable = "avx512f,avx512vl")]
45904    unsafe fn test_mm_maskz_rsqrt14_ps() {
45905        let a = _mm_set1_ps(3.);
45906        let r = _mm_maskz_rsqrt14_ps(0, a);
45907        assert_eq_m128(r, _mm_setzero_ps());
45908        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
45909        let e = _mm_set1_ps(0.5773392);
45910        assert_eq_m128(r, e);
45911    }
45912
45913    #[simd_test(enable = "avx512f")]
45914    unsafe fn test_mm512_getexp_ps() {
45915        let a = _mm512_set1_ps(3.);
45916        let r = _mm512_getexp_ps(a);
45917        let e = _mm512_set1_ps(1.);
45918        assert_eq_m512(r, e);
45919    }
45920
45921    #[simd_test(enable = "avx512f")]
45922    unsafe fn test_mm512_mask_getexp_ps() {
45923        let a = _mm512_set1_ps(3.);
45924        let r = _mm512_mask_getexp_ps(a, 0, a);
45925        assert_eq_m512(r, a);
45926        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
45927        let e = _mm512_setr_ps(
45928            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
45929        );
45930        assert_eq_m512(r, e);
45931    }
45932
45933    #[simd_test(enable = "avx512f")]
45934    unsafe fn test_mm512_maskz_getexp_ps() {
45935        let a = _mm512_set1_ps(3.);
45936        let r = _mm512_maskz_getexp_ps(0, a);
45937        assert_eq_m512(r, _mm512_setzero_ps());
45938        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
45939        let e = _mm512_setr_ps(
45940            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45941        );
45942        assert_eq_m512(r, e);
45943    }
45944
45945    #[simd_test(enable = "avx512f,avx512vl")]
45946    unsafe fn test_mm256_getexp_ps() {
45947        let a = _mm256_set1_ps(3.);
45948        let r = _mm256_getexp_ps(a);
45949        let e = _mm256_set1_ps(1.);
45950        assert_eq_m256(r, e);
45951    }
45952
45953    #[simd_test(enable = "avx512f,avx512vl")]
45954    unsafe fn test_mm256_mask_getexp_ps() {
45955        let a = _mm256_set1_ps(3.);
45956        let r = _mm256_mask_getexp_ps(a, 0, a);
45957        assert_eq_m256(r, a);
45958        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
45959        let e = _mm256_set1_ps(1.);
45960        assert_eq_m256(r, e);
45961    }
45962
45963    #[simd_test(enable = "avx512f,avx512vl")]
45964    unsafe fn test_mm256_maskz_getexp_ps() {
45965        let a = _mm256_set1_ps(3.);
45966        let r = _mm256_maskz_getexp_ps(0, a);
45967        assert_eq_m256(r, _mm256_setzero_ps());
45968        let r = _mm256_maskz_getexp_ps(0b11111111, a);
45969        let e = _mm256_set1_ps(1.);
45970        assert_eq_m256(r, e);
45971    }
45972
45973    #[simd_test(enable = "avx512f,avx512vl")]
45974    unsafe fn test_mm_getexp_ps() {
45975        let a = _mm_set1_ps(3.);
45976        let r = _mm_getexp_ps(a);
45977        let e = _mm_set1_ps(1.);
45978        assert_eq_m128(r, e);
45979    }
45980
45981    #[simd_test(enable = "avx512f,avx512vl")]
45982    unsafe fn test_mm_mask_getexp_ps() {
45983        let a = _mm_set1_ps(3.);
45984        let r = _mm_mask_getexp_ps(a, 0, a);
45985        assert_eq_m128(r, a);
45986        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
45987        let e = _mm_set1_ps(1.);
45988        assert_eq_m128(r, e);
45989    }
45990
45991    #[simd_test(enable = "avx512f,avx512vl")]
45992    unsafe fn test_mm_maskz_getexp_ps() {
45993        let a = _mm_set1_ps(3.);
45994        let r = _mm_maskz_getexp_ps(0, a);
45995        assert_eq_m128(r, _mm_setzero_ps());
45996        let r = _mm_maskz_getexp_ps(0b00001111, a);
45997        let e = _mm_set1_ps(1.);
45998        assert_eq_m128(r, e);
45999    }
46000
46001    #[simd_test(enable = "avx512f")]
46002    unsafe fn test_mm512_roundscale_ps() {
46003        let a = _mm512_set1_ps(1.1);
46004        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
46005        let e = _mm512_set1_ps(1.0);
46006        assert_eq_m512(r, e);
46007    }
46008
46009    #[simd_test(enable = "avx512f")]
46010    unsafe fn test_mm512_mask_roundscale_ps() {
46011        let a = _mm512_set1_ps(1.1);
46012        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46013        let e = _mm512_set1_ps(1.1);
46014        assert_eq_m512(r, e);
46015        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
46016        let e = _mm512_set1_ps(1.0);
46017        assert_eq_m512(r, e);
46018    }
46019
46020    #[simd_test(enable = "avx512f")]
46021    unsafe fn test_mm512_maskz_roundscale_ps() {
46022        let a = _mm512_set1_ps(1.1);
46023        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46024        assert_eq_m512(r, _mm512_setzero_ps());
46025        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
46026        let e = _mm512_set1_ps(1.0);
46027        assert_eq_m512(r, e);
46028    }
46029
46030    #[simd_test(enable = "avx512f,avx512vl")]
46031    unsafe fn test_mm256_roundscale_ps() {
46032        let a = _mm256_set1_ps(1.1);
46033        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
46034        let e = _mm256_set1_ps(1.0);
46035        assert_eq_m256(r, e);
46036    }
46037
46038    #[simd_test(enable = "avx512f,avx512vl")]
46039    unsafe fn test_mm256_mask_roundscale_ps() {
46040        let a = _mm256_set1_ps(1.1);
46041        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46042        let e = _mm256_set1_ps(1.1);
46043        assert_eq_m256(r, e);
46044        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
46045        let e = _mm256_set1_ps(1.0);
46046        assert_eq_m256(r, e);
46047    }
46048
46049    #[simd_test(enable = "avx512f,avx512vl")]
46050    unsafe fn test_mm256_maskz_roundscale_ps() {
46051        let a = _mm256_set1_ps(1.1);
46052        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46053        assert_eq_m256(r, _mm256_setzero_ps());
46054        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
46055        let e = _mm256_set1_ps(1.0);
46056        assert_eq_m256(r, e);
46057    }
46058
46059    #[simd_test(enable = "avx512f,avx512vl")]
46060    unsafe fn test_mm_roundscale_ps() {
46061        let a = _mm_set1_ps(1.1);
46062        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
46063        let e = _mm_set1_ps(1.0);
46064        assert_eq_m128(r, e);
46065    }
46066
46067    #[simd_test(enable = "avx512f,avx512vl")]
46068    unsafe fn test_mm_mask_roundscale_ps() {
46069        let a = _mm_set1_ps(1.1);
46070        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46071        let e = _mm_set1_ps(1.1);
46072        assert_eq_m128(r, e);
46073        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
46074        let e = _mm_set1_ps(1.0);
46075        assert_eq_m128(r, e);
46076    }
46077
46078    #[simd_test(enable = "avx512f,avx512vl")]
46079    unsafe fn test_mm_maskz_roundscale_ps() {
46080        let a = _mm_set1_ps(1.1);
46081        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46082        assert_eq_m128(r, _mm_setzero_ps());
46083        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
46084        let e = _mm_set1_ps(1.0);
46085        assert_eq_m128(r, e);
46086    }
46087
46088    #[simd_test(enable = "avx512f")]
46089    unsafe fn test_mm512_scalef_ps() {
46090        let a = _mm512_set1_ps(1.);
46091        let b = _mm512_set1_ps(3.);
46092        let r = _mm512_scalef_ps(a, b);
46093        let e = _mm512_set1_ps(8.);
46094        assert_eq_m512(r, e);
46095    }
46096
46097    #[simd_test(enable = "avx512f")]
46098    unsafe fn test_mm512_mask_scalef_ps() {
46099        let a = _mm512_set1_ps(1.);
46100        let b = _mm512_set1_ps(3.);
46101        let r = _mm512_mask_scalef_ps(a, 0, a, b);
46102        assert_eq_m512(r, a);
46103        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
46104        let e = _mm512_set_ps(
46105            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46106        );
46107        assert_eq_m512(r, e);
46108    }
46109
46110    #[simd_test(enable = "avx512f")]
46111    unsafe fn test_mm512_maskz_scalef_ps() {
46112        let a = _mm512_set1_ps(1.);
46113        let b = _mm512_set1_ps(3.);
46114        let r = _mm512_maskz_scalef_ps(0, a, b);
46115        assert_eq_m512(r, _mm512_setzero_ps());
46116        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
46117        let e = _mm512_set_ps(
46118            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46119        );
46120        assert_eq_m512(r, e);
46121    }
46122
46123    #[simd_test(enable = "avx512f,avx512vl")]
46124    unsafe fn test_mm256_scalef_ps() {
46125        let a = _mm256_set1_ps(1.);
46126        let b = _mm256_set1_ps(3.);
46127        let r = _mm256_scalef_ps(a, b);
46128        let e = _mm256_set1_ps(8.);
46129        assert_eq_m256(r, e);
46130    }
46131
46132    #[simd_test(enable = "avx512f,avx512vl")]
46133    unsafe fn test_mm256_mask_scalef_ps() {
46134        let a = _mm256_set1_ps(1.);
46135        let b = _mm256_set1_ps(3.);
46136        let r = _mm256_mask_scalef_ps(a, 0, a, b);
46137        assert_eq_m256(r, a);
46138        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
46139        let e = _mm256_set1_ps(8.);
46140        assert_eq_m256(r, e);
46141    }
46142
46143    #[simd_test(enable = "avx512f,avx512vl")]
46144    unsafe fn test_mm256_maskz_scalef_ps() {
46145        let a = _mm256_set1_ps(1.);
46146        let b = _mm256_set1_ps(3.);
46147        let r = _mm256_maskz_scalef_ps(0, a, b);
46148        assert_eq_m256(r, _mm256_setzero_ps());
46149        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
46150        let e = _mm256_set1_ps(8.);
46151        assert_eq_m256(r, e);
46152    }
46153
46154    #[simd_test(enable = "avx512f,avx512vl")]
46155    unsafe fn test_mm_scalef_ps() {
46156        let a = _mm_set1_ps(1.);
46157        let b = _mm_set1_ps(3.);
46158        let r = _mm_scalef_ps(a, b);
46159        let e = _mm_set1_ps(8.);
46160        assert_eq_m128(r, e);
46161    }
46162
46163    #[simd_test(enable = "avx512f,avx512vl")]
46164    unsafe fn test_mm_mask_scalef_ps() {
46165        let a = _mm_set1_ps(1.);
46166        let b = _mm_set1_ps(3.);
46167        let r = _mm_mask_scalef_ps(a, 0, a, b);
46168        assert_eq_m128(r, a);
46169        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
46170        let e = _mm_set1_ps(8.);
46171        assert_eq_m128(r, e);
46172    }
46173
46174    #[simd_test(enable = "avx512f,avx512vl")]
46175    unsafe fn test_mm_maskz_scalef_ps() {
46176        let a = _mm_set1_ps(1.);
46177        let b = _mm_set1_ps(3.);
46178        let r = _mm_maskz_scalef_ps(0, a, b);
46179        assert_eq_m128(r, _mm_setzero_ps());
46180        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
46181        let e = _mm_set1_ps(8.);
46182        assert_eq_m128(r, e);
46183    }
46184
46185    #[simd_test(enable = "avx512f")]
46186    unsafe fn test_mm512_fixupimm_ps() {
46187        let a = _mm512_set1_ps(f32::NAN);
46188        let b = _mm512_set1_ps(f32::MAX);
46189        let c = _mm512_set1_epi32(i32::MAX);
46190        //let r = _mm512_fixupimm_ps(a, b, c, 5);
46191        let r = _mm512_fixupimm_ps::<5>(a, b, c);
46192        let e = _mm512_set1_ps(0.0);
46193        assert_eq_m512(r, e);
46194    }
46195
46196    #[simd_test(enable = "avx512f")]
46197    unsafe fn test_mm512_mask_fixupimm_ps() {
46198        #[rustfmt::skip]
46199        let a = _mm512_set_ps(
46200            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46201            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46202            1., 1., 1., 1.,
46203            1., 1., 1., 1.,
46204        );
46205        let b = _mm512_set1_ps(f32::MAX);
46206        let c = _mm512_set1_epi32(i32::MAX);
46207        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
46208        let e = _mm512_set_ps(
46209            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46210        );
46211        assert_eq_m512(r, e);
46212    }
46213
46214    #[simd_test(enable = "avx512f")]
46215    unsafe fn test_mm512_maskz_fixupimm_ps() {
46216        #[rustfmt::skip]
46217        let a = _mm512_set_ps(
46218            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46219            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46220            1., 1., 1., 1.,
46221            1., 1., 1., 1.,
46222        );
46223        let b = _mm512_set1_ps(f32::MAX);
46224        let c = _mm512_set1_epi32(i32::MAX);
46225        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
46226        let e = _mm512_set_ps(
46227            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
46228        );
46229        assert_eq_m512(r, e);
46230    }
46231
46232    #[simd_test(enable = "avx512f,avx512vl")]
46233    unsafe fn test_mm256_fixupimm_ps() {
46234        let a = _mm256_set1_ps(f32::NAN);
46235        let b = _mm256_set1_ps(f32::MAX);
46236        let c = _mm256_set1_epi32(i32::MAX);
46237        let r = _mm256_fixupimm_ps::<5>(a, b, c);
46238        let e = _mm256_set1_ps(0.0);
46239        assert_eq_m256(r, e);
46240    }
46241
46242    #[simd_test(enable = "avx512f,avx512vl")]
46243    unsafe fn test_mm256_mask_fixupimm_ps() {
46244        let a = _mm256_set1_ps(f32::NAN);
46245        let b = _mm256_set1_ps(f32::MAX);
46246        let c = _mm256_set1_epi32(i32::MAX);
46247        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
46248        let e = _mm256_set1_ps(0.0);
46249        assert_eq_m256(r, e);
46250    }
46251
46252    #[simd_test(enable = "avx512f,avx512vl")]
46253    unsafe fn test_mm256_maskz_fixupimm_ps() {
46254        let a = _mm256_set1_ps(f32::NAN);
46255        let b = _mm256_set1_ps(f32::MAX);
46256        let c = _mm256_set1_epi32(i32::MAX);
46257        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
46258        let e = _mm256_set1_ps(0.0);
46259        assert_eq_m256(r, e);
46260    }
46261
46262    #[simd_test(enable = "avx512f,avx512vl")]
46263    unsafe fn test_mm_fixupimm_ps() {
46264        let a = _mm_set1_ps(f32::NAN);
46265        let b = _mm_set1_ps(f32::MAX);
46266        let c = _mm_set1_epi32(i32::MAX);
46267        let r = _mm_fixupimm_ps::<5>(a, b, c);
46268        let e = _mm_set1_ps(0.0);
46269        assert_eq_m128(r, e);
46270    }
46271
46272    #[simd_test(enable = "avx512f,avx512vl")]
46273    unsafe fn test_mm_mask_fixupimm_ps() {
46274        let a = _mm_set1_ps(f32::NAN);
46275        let b = _mm_set1_ps(f32::MAX);
46276        let c = _mm_set1_epi32(i32::MAX);
46277        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
46278        let e = _mm_set1_ps(0.0);
46279        assert_eq_m128(r, e);
46280    }
46281
46282    #[simd_test(enable = "avx512f,avx512vl")]
46283    unsafe fn test_mm_maskz_fixupimm_ps() {
46284        let a = _mm_set1_ps(f32::NAN);
46285        let b = _mm_set1_ps(f32::MAX);
46286        let c = _mm_set1_epi32(i32::MAX);
46287        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
46288        let e = _mm_set1_ps(0.0);
46289        assert_eq_m128(r, e);
46290    }
46291
46292    #[simd_test(enable = "avx512f")]
46293    unsafe fn test_mm512_ternarylogic_epi32() {
46294        let a = _mm512_set1_epi32(1 << 2);
46295        let b = _mm512_set1_epi32(1 << 1);
46296        let c = _mm512_set1_epi32(1 << 0);
46297        let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
46298        let e = _mm512_set1_epi32(0);
46299        assert_eq_m512i(r, e);
46300    }
46301
46302    #[simd_test(enable = "avx512f")]
46303    unsafe fn test_mm512_mask_ternarylogic_epi32() {
46304        let src = _mm512_set1_epi32(1 << 2);
46305        let a = _mm512_set1_epi32(1 << 1);
46306        let b = _mm512_set1_epi32(1 << 0);
46307        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46308        assert_eq_m512i(r, src);
46309        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
46310        let e = _mm512_set1_epi32(0);
46311        assert_eq_m512i(r, e);
46312    }
46313
46314    #[simd_test(enable = "avx512f")]
46315    unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46316        let a = _mm512_set1_epi32(1 << 2);
46317        let b = _mm512_set1_epi32(1 << 1);
46318        let c = _mm512_set1_epi32(1 << 0);
46319        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46320        assert_eq_m512i(r, _mm512_setzero_si512());
46321        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
46322        let e = _mm512_set1_epi32(0);
46323        assert_eq_m512i(r, e);
46324    }
46325
46326    #[simd_test(enable = "avx512f,avx512vl")]
46327    unsafe fn test_mm256_ternarylogic_epi32() {
46328        let a = _mm256_set1_epi32(1 << 2);
46329        let b = _mm256_set1_epi32(1 << 1);
46330        let c = _mm256_set1_epi32(1 << 0);
46331        let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
46332        let e = _mm256_set1_epi32(0);
46333        assert_eq_m256i(r, e);
46334    }
46335
46336    #[simd_test(enable = "avx512f,avx512vl")]
46337    unsafe fn test_mm256_mask_ternarylogic_epi32() {
46338        let src = _mm256_set1_epi32(1 << 2);
46339        let a = _mm256_set1_epi32(1 << 1);
46340        let b = _mm256_set1_epi32(1 << 0);
46341        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46342        assert_eq_m256i(r, src);
46343        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
46344        let e = _mm256_set1_epi32(0);
46345        assert_eq_m256i(r, e);
46346    }
46347
46348    #[simd_test(enable = "avx512f,avx512vl")]
46349    unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46350        let a = _mm256_set1_epi32(1 << 2);
46351        let b = _mm256_set1_epi32(1 << 1);
46352        let c = _mm256_set1_epi32(1 << 0);
46353        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46354        assert_eq_m256i(r, _mm256_setzero_si256());
46355        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
46356        let e = _mm256_set1_epi32(0);
46357        assert_eq_m256i(r, e);
46358    }
46359
46360    #[simd_test(enable = "avx512f,avx512vl")]
46361    unsafe fn test_mm_ternarylogic_epi32() {
46362        let a = _mm_set1_epi32(1 << 2);
46363        let b = _mm_set1_epi32(1 << 1);
46364        let c = _mm_set1_epi32(1 << 0);
46365        let r = _mm_ternarylogic_epi32::<8>(a, b, c);
46366        let e = _mm_set1_epi32(0);
46367        assert_eq_m128i(r, e);
46368    }
46369
46370    #[simd_test(enable = "avx512f,avx512vl")]
46371    unsafe fn test_mm_mask_ternarylogic_epi32() {
46372        let src = _mm_set1_epi32(1 << 2);
46373        let a = _mm_set1_epi32(1 << 1);
46374        let b = _mm_set1_epi32(1 << 0);
46375        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46376        assert_eq_m128i(r, src);
46377        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
46378        let e = _mm_set1_epi32(0);
46379        assert_eq_m128i(r, e);
46380    }
46381
46382    #[simd_test(enable = "avx512f,avx512vl")]
46383    unsafe fn test_mm_maskz_ternarylogic_epi32() {
46384        let a = _mm_set1_epi32(1 << 2);
46385        let b = _mm_set1_epi32(1 << 1);
46386        let c = _mm_set1_epi32(1 << 0);
46387        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46388        assert_eq_m128i(r, _mm_setzero_si128());
46389        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
46390        let e = _mm_set1_epi32(0);
46391        assert_eq_m128i(r, e);
46392    }
46393
46394    #[simd_test(enable = "avx512f")]
46395    unsafe fn test_mm512_getmant_ps() {
46396        let a = _mm512_set1_ps(10.);
46397        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46398        let e = _mm512_set1_ps(1.25);
46399        assert_eq_m512(r, e);
46400    }
46401
46402    #[simd_test(enable = "avx512f")]
46403    unsafe fn test_mm512_mask_getmant_ps() {
46404        let a = _mm512_set1_ps(10.);
46405        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46406        assert_eq_m512(r, a);
46407        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46408            a,
46409            0b11111111_00000000,
46410            a,
46411        );
46412        let e = _mm512_setr_ps(
46413            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46414        );
46415        assert_eq_m512(r, e);
46416    }
46417
46418    #[simd_test(enable = "avx512f")]
46419    unsafe fn test_mm512_maskz_getmant_ps() {
46420        let a = _mm512_set1_ps(10.);
46421        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46422        assert_eq_m512(r, _mm512_setzero_ps());
46423        let r =
46424            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
46425        let e = _mm512_setr_ps(
46426            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46427        );
46428        assert_eq_m512(r, e);
46429    }
46430
46431    #[simd_test(enable = "avx512f,avx512vl")]
46432    unsafe fn test_mm256_getmant_ps() {
46433        let a = _mm256_set1_ps(10.);
46434        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46435        let e = _mm256_set1_ps(1.25);
46436        assert_eq_m256(r, e);
46437    }
46438
46439    #[simd_test(enable = "avx512f,avx512vl")]
46440    unsafe fn test_mm256_mask_getmant_ps() {
46441        let a = _mm256_set1_ps(10.);
46442        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46443        assert_eq_m256(r, a);
46444        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
46445        let e = _mm256_set1_ps(1.25);
46446        assert_eq_m256(r, e);
46447    }
46448
46449    #[simd_test(enable = "avx512f,avx512vl")]
46450    unsafe fn test_mm256_maskz_getmant_ps() {
46451        let a = _mm256_set1_ps(10.);
46452        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46453        assert_eq_m256(r, _mm256_setzero_ps());
46454        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
46455        let e = _mm256_set1_ps(1.25);
46456        assert_eq_m256(r, e);
46457    }
46458
46459    #[simd_test(enable = "avx512f,avx512vl")]
46460    unsafe fn test_mm_getmant_ps() {
46461        let a = _mm_set1_ps(10.);
46462        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46463        let e = _mm_set1_ps(1.25);
46464        assert_eq_m128(r, e);
46465    }
46466
46467    #[simd_test(enable = "avx512f,avx512vl")]
46468    unsafe fn test_mm_mask_getmant_ps() {
46469        let a = _mm_set1_ps(10.);
46470        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46471        assert_eq_m128(r, a);
46472        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
46473        let e = _mm_set1_ps(1.25);
46474        assert_eq_m128(r, e);
46475    }
46476
46477    #[simd_test(enable = "avx512f,avx512vl")]
46478    unsafe fn test_mm_maskz_getmant_ps() {
46479        let a = _mm_set1_ps(10.);
46480        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46481        assert_eq_m128(r, _mm_setzero_ps());
46482        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
46483        let e = _mm_set1_ps(1.25);
46484        assert_eq_m128(r, e);
46485    }
46486
46487    #[simd_test(enable = "avx512f")]
46488    unsafe fn test_mm512_add_round_ps() {
46489        let a = _mm512_setr_ps(
46490            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46491        );
46492        let b = _mm512_set1_ps(-1.);
46493        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46494        #[rustfmt::skip]
46495        let e = _mm512_setr_ps(
46496            -1., 0.5, 1., 2.5,
46497            3., 4.5, 5., 6.5,
46498            7., 8.5, 9., 10.5,
46499            11., 12.5, 13., -0.99999994,
46500        );
46501        assert_eq_m512(r, e);
46502        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46503        let e = _mm512_setr_ps(
46504            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46505        );
46506        assert_eq_m512(r, e);
46507    }
46508
46509    #[simd_test(enable = "avx512f")]
46510    unsafe fn test_mm512_mask_add_round_ps() {
46511        let a = _mm512_setr_ps(
46512            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46513        );
46514        let b = _mm512_set1_ps(-1.);
46515        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
46516        assert_eq_m512(r, a);
46517        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46518            a,
46519            0b11111111_00000000,
46520            a,
46521            b,
46522        );
46523        #[rustfmt::skip]
46524        let e = _mm512_setr_ps(
46525            0., 1.5, 2., 3.5,
46526            4., 5.5, 6., 7.5,
46527            7., 8.5, 9., 10.5,
46528            11., 12.5, 13., -0.99999994,
46529        );
46530        assert_eq_m512(r, e);
46531    }
46532
46533    #[simd_test(enable = "avx512f")]
46534    unsafe fn test_mm512_maskz_add_round_ps() {
46535        let a = _mm512_setr_ps(
46536            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46537        );
46538        let b = _mm512_set1_ps(-1.);
46539        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
46540        assert_eq_m512(r, _mm512_setzero_ps());
46541        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46542            0b11111111_00000000,
46543            a,
46544            b,
46545        );
46546        #[rustfmt::skip]
46547        let e = _mm512_setr_ps(
46548            0., 0., 0., 0.,
46549            0., 0., 0., 0.,
46550            7., 8.5, 9., 10.5,
46551            11., 12.5, 13., -0.99999994,
46552        );
46553        assert_eq_m512(r, e);
46554    }
46555
46556    #[simd_test(enable = "avx512f")]
46557    unsafe fn test_mm512_sub_round_ps() {
46558        let a = _mm512_setr_ps(
46559            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46560        );
46561        let b = _mm512_set1_ps(1.);
46562        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46563        #[rustfmt::skip]
46564        let e = _mm512_setr_ps(
46565            -1., 0.5, 1., 2.5,
46566            3., 4.5, 5., 6.5,
46567            7., 8.5, 9., 10.5,
46568            11., 12.5, 13., -0.99999994,
46569        );
46570        assert_eq_m512(r, e);
46571        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46572        let e = _mm512_setr_ps(
46573            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46574        );
46575        assert_eq_m512(r, e);
46576    }
46577
46578    #[simd_test(enable = "avx512f")]
46579    unsafe fn test_mm512_mask_sub_round_ps() {
46580        let a = _mm512_setr_ps(
46581            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46582        );
46583        let b = _mm512_set1_ps(1.);
46584        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46585            a, 0, a, b,
46586        );
46587        assert_eq_m512(r, a);
46588        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46589            a,
46590            0b11111111_00000000,
46591            a,
46592            b,
46593        );
46594        #[rustfmt::skip]
46595        let e = _mm512_setr_ps(
46596            0., 1.5, 2., 3.5,
46597            4., 5.5, 6., 7.5,
46598            7., 8.5, 9., 10.5,
46599            11., 12.5, 13., -0.99999994,
46600        );
46601        assert_eq_m512(r, e);
46602    }
46603
46604    #[simd_test(enable = "avx512f")]
46605    unsafe fn test_mm512_maskz_sub_round_ps() {
46606        let a = _mm512_setr_ps(
46607            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46608        );
46609        let b = _mm512_set1_ps(1.);
46610        let r =
46611            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46612        assert_eq_m512(r, _mm512_setzero_ps());
46613        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46614            0b11111111_00000000,
46615            a,
46616            b,
46617        );
46618        #[rustfmt::skip]
46619        let e = _mm512_setr_ps(
46620            0., 0., 0., 0.,
46621            0., 0., 0., 0.,
46622            7., 8.5, 9., 10.5,
46623            11., 12.5, 13., -0.99999994,
46624        );
46625        assert_eq_m512(r, e);
46626    }
46627
46628    #[simd_test(enable = "avx512f")]
46629    unsafe fn test_mm512_mul_round_ps() {
46630        #[rustfmt::skip]
46631        let a = _mm512_setr_ps(
46632            0., 1.5, 2., 3.5,
46633            4., 5.5, 6., 7.5,
46634            8., 9.5, 10., 11.5,
46635            12., 13.5, 14., 0.00000000000000000000007,
46636        );
46637        let b = _mm512_set1_ps(0.1);
46638        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46639        #[rustfmt::skip]
46640        let e = _mm512_setr_ps(
46641            0., 0.15, 0.2, 0.35,
46642            0.4, 0.55, 0.6, 0.75,
46643            0.8, 0.95, 1.0, 1.15,
46644            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46645        );
46646        assert_eq_m512(r, e);
46647        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46648        #[rustfmt::skip]
46649        let e = _mm512_setr_ps(
46650            0., 0.14999999, 0.2, 0.35,
46651            0.4, 0.54999995, 0.59999996, 0.75,
46652            0.8, 0.95, 1.0, 1.15,
46653            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
46654        );
46655        assert_eq_m512(r, e);
46656    }
46657
46658    #[simd_test(enable = "avx512f")]
46659    unsafe fn test_mm512_mask_mul_round_ps() {
46660        #[rustfmt::skip]
46661        let a = _mm512_setr_ps(
46662            0., 1.5, 2., 3.5,
46663            4., 5.5, 6., 7.5,
46664            8., 9.5, 10., 11.5,
46665            12., 13.5, 14., 0.00000000000000000000007,
46666        );
46667        let b = _mm512_set1_ps(0.1);
46668        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46669            a, 0, a, b,
46670        );
46671        assert_eq_m512(r, a);
46672        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46673            a,
46674            0b11111111_00000000,
46675            a,
46676            b,
46677        );
46678        #[rustfmt::skip]
46679        let e = _mm512_setr_ps(
46680            0., 1.5, 2., 3.5,
46681            4., 5.5, 6., 7.5,
46682            0.8, 0.95, 1.0, 1.15,
46683            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46684        );
46685        assert_eq_m512(r, e);
46686    }
46687
46688    #[simd_test(enable = "avx512f")]
46689    unsafe fn test_mm512_maskz_mul_round_ps() {
46690        #[rustfmt::skip]
46691        let a = _mm512_setr_ps(
46692            0., 1.5, 2., 3.5,
46693            4., 5.5, 6., 7.5,
46694            8., 9.5, 10., 11.5,
46695            12., 13.5, 14., 0.00000000000000000000007,
46696        );
46697        let b = _mm512_set1_ps(0.1);
46698        let r =
46699            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46700        assert_eq_m512(r, _mm512_setzero_ps());
46701        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46702            0b11111111_00000000,
46703            a,
46704            b,
46705        );
46706        #[rustfmt::skip]
46707        let e = _mm512_setr_ps(
46708            0., 0., 0., 0.,
46709            0., 0., 0., 0.,
46710            0.8, 0.95, 1.0, 1.15,
46711            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46712        );
46713        assert_eq_m512(r, e);
46714    }
46715
46716    #[simd_test(enable = "avx512f")]
46717    unsafe fn test_mm512_div_round_ps() {
46718        let a = _mm512_set1_ps(1.);
46719        let b = _mm512_set1_ps(3.);
46720        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46721        let e = _mm512_set1_ps(0.33333334);
46722        assert_eq_m512(r, e);
46723        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46724        let e = _mm512_set1_ps(0.3333333);
46725        assert_eq_m512(r, e);
46726    }
46727
46728    #[simd_test(enable = "avx512f")]
46729    unsafe fn test_mm512_mask_div_round_ps() {
46730        let a = _mm512_set1_ps(1.);
46731        let b = _mm512_set1_ps(3.);
46732        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46733            a, 0, a, b,
46734        );
46735        assert_eq_m512(r, a);
46736        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46737            a,
46738            0b11111111_00000000,
46739            a,
46740            b,
46741        );
46742        let e = _mm512_setr_ps(
46743            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46744            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46745        );
46746        assert_eq_m512(r, e);
46747    }
46748
46749    #[simd_test(enable = "avx512f")]
46750    unsafe fn test_mm512_maskz_div_round_ps() {
46751        let a = _mm512_set1_ps(1.);
46752        let b = _mm512_set1_ps(3.);
46753        let r =
46754            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46755        assert_eq_m512(r, _mm512_setzero_ps());
46756        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46757            0b11111111_00000000,
46758            a,
46759            b,
46760        );
46761        let e = _mm512_setr_ps(
46762            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46763            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46764        );
46765        assert_eq_m512(r, e);
46766    }
46767
46768    #[simd_test(enable = "avx512f")]
46769    unsafe fn test_mm512_sqrt_round_ps() {
46770        let a = _mm512_set1_ps(3.);
46771        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46772        let e = _mm512_set1_ps(1.7320508);
46773        assert_eq_m512(r, e);
46774        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
46775        let e = _mm512_set1_ps(1.7320509);
46776        assert_eq_m512(r, e);
46777    }
46778
46779    #[simd_test(enable = "avx512f")]
46780    unsafe fn test_mm512_mask_sqrt_round_ps() {
46781        let a = _mm512_set1_ps(3.);
46782        let r =
46783            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
46784        assert_eq_m512(r, a);
46785        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46786            a,
46787            0b11111111_00000000,
46788            a,
46789        );
46790        let e = _mm512_setr_ps(
46791            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46792            1.7320508, 1.7320508, 1.7320508,
46793        );
46794        assert_eq_m512(r, e);
46795    }
46796
46797    #[simd_test(enable = "avx512f")]
46798    unsafe fn test_mm512_maskz_sqrt_round_ps() {
46799        let a = _mm512_set1_ps(3.);
46800        let r =
46801            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
46802        assert_eq_m512(r, _mm512_setzero_ps());
46803        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46804            0b11111111_00000000,
46805            a,
46806        );
46807        let e = _mm512_setr_ps(
46808            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46809            1.7320508, 1.7320508, 1.7320508,
46810        );
46811        assert_eq_m512(r, e);
46812    }
46813
46814    #[simd_test(enable = "avx512f")]
46815    unsafe fn test_mm512_fmadd_round_ps() {
46816        let a = _mm512_set1_ps(0.00000007);
46817        let b = _mm512_set1_ps(1.);
46818        let c = _mm512_set1_ps(-1.);
46819        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46820        let e = _mm512_set1_ps(-0.99999994);
46821        assert_eq_m512(r, e);
46822        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46823        let e = _mm512_set1_ps(-0.9999999);
46824        assert_eq_m512(r, e);
46825    }
46826
46827    #[simd_test(enable = "avx512f")]
46828    unsafe fn test_mm512_mask_fmadd_round_ps() {
46829        let a = _mm512_set1_ps(0.00000007);
46830        let b = _mm512_set1_ps(1.);
46831        let c = _mm512_set1_ps(-1.);
46832        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46833            a, 0, b, c,
46834        );
46835        assert_eq_m512(r, a);
46836        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46837            a,
46838            0b00000000_11111111,
46839            b,
46840            c,
46841        );
46842        #[rustfmt::skip]
46843        let e = _mm512_setr_ps(
46844            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46845            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46846            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46847            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46848        );
46849        assert_eq_m512(r, e);
46850    }
46851
46852    #[simd_test(enable = "avx512f")]
46853    unsafe fn test_mm512_maskz_fmadd_round_ps() {
46854        let a = _mm512_set1_ps(0.00000007);
46855        let b = _mm512_set1_ps(1.);
46856        let c = _mm512_set1_ps(-1.);
46857        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46858            0, a, b, c,
46859        );
46860        assert_eq_m512(r, _mm512_setzero_ps());
46861        #[rustfmt::skip]
46862        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46863            0b00000000_11111111,
46864            a,
46865            b,
46866            c,
46867        );
46868        #[rustfmt::skip]
46869        let e = _mm512_setr_ps(
46870            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46871            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46872            0., 0., 0., 0.,
46873            0., 0., 0., 0.,
46874        );
46875        assert_eq_m512(r, e);
46876    }
46877
46878    #[simd_test(enable = "avx512f")]
46879    unsafe fn test_mm512_mask3_fmadd_round_ps() {
46880        let a = _mm512_set1_ps(0.00000007);
46881        let b = _mm512_set1_ps(1.);
46882        let c = _mm512_set1_ps(-1.);
46883        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46884            a, b, c, 0,
46885        );
46886        assert_eq_m512(r, c);
46887        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46888            a,
46889            b,
46890            c,
46891            0b00000000_11111111,
46892        );
46893        #[rustfmt::skip]
46894        let e = _mm512_setr_ps(
46895            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46896            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46897            -1., -1., -1., -1.,
46898            -1., -1., -1., -1.,
46899        );
46900        assert_eq_m512(r, e);
46901    }
46902
46903    #[simd_test(enable = "avx512f")]
46904    unsafe fn test_mm512_fmsub_round_ps() {
46905        let a = _mm512_set1_ps(0.00000007);
46906        let b = _mm512_set1_ps(1.);
46907        let c = _mm512_set1_ps(1.);
46908        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46909        let e = _mm512_set1_ps(-0.99999994);
46910        assert_eq_m512(r, e);
46911        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46912        let e = _mm512_set1_ps(-0.9999999);
46913        assert_eq_m512(r, e);
46914    }
46915
46916    #[simd_test(enable = "avx512f")]
46917    unsafe fn test_mm512_mask_fmsub_round_ps() {
46918        let a = _mm512_set1_ps(0.00000007);
46919        let b = _mm512_set1_ps(1.);
46920        let c = _mm512_set1_ps(1.);
46921        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46922            a, 0, b, c,
46923        );
46924        assert_eq_m512(r, a);
46925        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46926            a,
46927            0b00000000_11111111,
46928            b,
46929            c,
46930        );
46931        #[rustfmt::skip]
46932        let e = _mm512_setr_ps(
46933            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46934            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46935            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46936            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46937        );
46938        assert_eq_m512(r, e);
46939    }
46940
46941    #[simd_test(enable = "avx512f")]
46942    unsafe fn test_mm512_maskz_fmsub_round_ps() {
46943        let a = _mm512_set1_ps(0.00000007);
46944        let b = _mm512_set1_ps(1.);
46945        let c = _mm512_set1_ps(1.);
46946        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46947            0, a, b, c,
46948        );
46949        assert_eq_m512(r, _mm512_setzero_ps());
46950        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46951            0b00000000_11111111,
46952            a,
46953            b,
46954            c,
46955        );
46956        #[rustfmt::skip]
46957        let e = _mm512_setr_ps(
46958            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46959            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46960            0., 0., 0., 0.,
46961            0., 0., 0., 0.,
46962        );
46963        assert_eq_m512(r, e);
46964    }
46965
46966    #[simd_test(enable = "avx512f")]
46967    unsafe fn test_mm512_mask3_fmsub_round_ps() {
46968        let a = _mm512_set1_ps(0.00000007);
46969        let b = _mm512_set1_ps(1.);
46970        let c = _mm512_set1_ps(1.);
46971        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46972            a, b, c, 0,
46973        );
46974        assert_eq_m512(r, c);
46975        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46976            a,
46977            b,
46978            c,
46979            0b00000000_11111111,
46980        );
46981        #[rustfmt::skip]
46982        let e = _mm512_setr_ps(
46983            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46984            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46985            1., 1., 1., 1.,
46986            1., 1., 1., 1.,
46987        );
46988        assert_eq_m512(r, e);
46989    }
46990
46991    #[simd_test(enable = "avx512f")]
46992    unsafe fn test_mm512_fmaddsub_round_ps() {
46993        let a = _mm512_set1_ps(0.00000007);
46994        let b = _mm512_set1_ps(1.);
46995        let c = _mm512_set1_ps(-1.);
46996        let r =
46997            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46998        #[rustfmt::skip]
46999        let e = _mm512_setr_ps(
47000            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47001            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47002            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47003            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47004        );
47005        assert_eq_m512(r, e);
47006        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47007        let e = _mm512_setr_ps(
47008            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47009            -0.9999999, 1., -0.9999999, 1., -0.9999999,
47010        );
47011        assert_eq_m512(r, e);
47012    }
47013
47014    #[simd_test(enable = "avx512f")]
47015    unsafe fn test_mm512_mask_fmaddsub_round_ps() {
47016        let a = _mm512_set1_ps(0.00000007);
47017        let b = _mm512_set1_ps(1.);
47018        let c = _mm512_set1_ps(-1.);
47019        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47020            a, 0, b, c,
47021        );
47022        assert_eq_m512(r, a);
47023        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47024            a,
47025            0b00000000_11111111,
47026            b,
47027            c,
47028        );
47029        #[rustfmt::skip]
47030        let e = _mm512_setr_ps(
47031            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47032            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47033            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47034            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47035        );
47036        assert_eq_m512(r, e);
47037    }
47038
47039    #[simd_test(enable = "avx512f")]
47040    unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47041        let a = _mm512_set1_ps(0.00000007);
47042        let b = _mm512_set1_ps(1.);
47043        let c = _mm512_set1_ps(-1.);
47044        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47045            0, a, b, c,
47046        );
47047        assert_eq_m512(r, _mm512_setzero_ps());
47048        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47049            0b00000000_11111111,
47050            a,
47051            b,
47052            c,
47053        );
47054        #[rustfmt::skip]
47055        let e = _mm512_setr_ps(
47056            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47057            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47058            0., 0., 0., 0.,
47059            0., 0., 0., 0.,
47060        );
47061        assert_eq_m512(r, e);
47062    }
47063
47064    #[simd_test(enable = "avx512f")]
47065    unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47066        let a = _mm512_set1_ps(0.00000007);
47067        let b = _mm512_set1_ps(1.);
47068        let c = _mm512_set1_ps(-1.);
47069        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47070            a, b, c, 0,
47071        );
47072        assert_eq_m512(r, c);
47073        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47074            a,
47075            b,
47076            c,
47077            0b00000000_11111111,
47078        );
47079        #[rustfmt::skip]
47080        let e = _mm512_setr_ps(
47081            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47082            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47083            -1., -1., -1., -1.,
47084            -1., -1., -1., -1.,
47085        );
47086        assert_eq_m512(r, e);
47087    }
47088
47089    #[simd_test(enable = "avx512f")]
47090    unsafe fn test_mm512_fmsubadd_round_ps() {
47091        let a = _mm512_set1_ps(0.00000007);
47092        let b = _mm512_set1_ps(1.);
47093        let c = _mm512_set1_ps(-1.);
47094        let r =
47095            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47096        #[rustfmt::skip]
47097        let e = _mm512_setr_ps(
47098            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47099            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47100            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47101            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47102        );
47103        assert_eq_m512(r, e);
47104        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47105        let e = _mm512_setr_ps(
47106            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47107            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47108        );
47109        assert_eq_m512(r, e);
47110    }
47111
47112    #[simd_test(enable = "avx512f")]
47113    unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47114        let a = _mm512_set1_ps(0.00000007);
47115        let b = _mm512_set1_ps(1.);
47116        let c = _mm512_set1_ps(-1.);
47117        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47118            a, 0, b, c,
47119        );
47120        assert_eq_m512(r, a);
47121        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47122            a,
47123            0b00000000_11111111,
47124            b,
47125            c,
47126        );
47127        #[rustfmt::skip]
47128        let e = _mm512_setr_ps(
47129            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47130            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47131            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47132            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47133        );
47134        assert_eq_m512(r, e);
47135    }
47136
47137    #[simd_test(enable = "avx512f")]
47138    unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47139        let a = _mm512_set1_ps(0.00000007);
47140        let b = _mm512_set1_ps(1.);
47141        let c = _mm512_set1_ps(-1.);
47142        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47143            0, a, b, c,
47144        );
47145        assert_eq_m512(r, _mm512_setzero_ps());
47146        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47147            0b00000000_11111111,
47148            a,
47149            b,
47150            c,
47151        );
47152        #[rustfmt::skip]
47153        let e = _mm512_setr_ps(
47154            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47155            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47156            0., 0., 0., 0.,
47157            0., 0., 0., 0.,
47158        );
47159        assert_eq_m512(r, e);
47160    }
47161
47162    #[simd_test(enable = "avx512f")]
47163    unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47164        let a = _mm512_set1_ps(0.00000007);
47165        let b = _mm512_set1_ps(1.);
47166        let c = _mm512_set1_ps(-1.);
47167        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47168            a, b, c, 0,
47169        );
47170        assert_eq_m512(r, c);
47171        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47172            a,
47173            b,
47174            c,
47175            0b00000000_11111111,
47176        );
47177        #[rustfmt::skip]
47178        let e = _mm512_setr_ps(
47179            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47180            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47181            -1., -1., -1., -1.,
47182            -1., -1., -1., -1.,
47183        );
47184        assert_eq_m512(r, e);
47185    }
47186
47187    #[simd_test(enable = "avx512f")]
47188    unsafe fn test_mm512_fnmadd_round_ps() {
47189        let a = _mm512_set1_ps(0.00000007);
47190        let b = _mm512_set1_ps(1.);
47191        let c = _mm512_set1_ps(1.);
47192        let r =
47193            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47194        let e = _mm512_set1_ps(0.99999994);
47195        assert_eq_m512(r, e);
47196        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47197        let e = _mm512_set1_ps(0.9999999);
47198        assert_eq_m512(r, e);
47199    }
47200
47201    #[simd_test(enable = "avx512f")]
47202    unsafe fn test_mm512_mask_fnmadd_round_ps() {
47203        let a = _mm512_set1_ps(0.00000007);
47204        let b = _mm512_set1_ps(1.);
47205        let c = _mm512_set1_ps(1.);
47206        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47207            a, 0, b, c,
47208        );
47209        assert_eq_m512(r, a);
47210        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47211            a,
47212            0b00000000_11111111,
47213            b,
47214            c,
47215        );
47216        let e = _mm512_setr_ps(
47217            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47218            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47219            0.00000007, 0.00000007,
47220        );
47221        assert_eq_m512(r, e);
47222    }
47223
47224    #[simd_test(enable = "avx512f")]
47225    unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47226        let a = _mm512_set1_ps(0.00000007);
47227        let b = _mm512_set1_ps(1.);
47228        let c = _mm512_set1_ps(1.);
47229        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47230            0, a, b, c,
47231        );
47232        assert_eq_m512(r, _mm512_setzero_ps());
47233        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47234            0b00000000_11111111,
47235            a,
47236            b,
47237            c,
47238        );
47239        let e = _mm512_setr_ps(
47240            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47241            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47242        );
47243        assert_eq_m512(r, e);
47244    }
47245
47246    #[simd_test(enable = "avx512f")]
47247    unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47248        let a = _mm512_set1_ps(0.00000007);
47249        let b = _mm512_set1_ps(1.);
47250        let c = _mm512_set1_ps(1.);
47251        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47252            a, b, c, 0,
47253        );
47254        assert_eq_m512(r, c);
47255        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47256            a,
47257            b,
47258            c,
47259            0b00000000_11111111,
47260        );
47261        let e = _mm512_setr_ps(
47262            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47263            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
47264        );
47265        assert_eq_m512(r, e);
47266    }
47267
47268    #[simd_test(enable = "avx512f")]
47269    unsafe fn test_mm512_fnmsub_round_ps() {
47270        let a = _mm512_set1_ps(0.00000007);
47271        let b = _mm512_set1_ps(1.);
47272        let c = _mm512_set1_ps(-1.);
47273        let r =
47274            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47275        let e = _mm512_set1_ps(0.99999994);
47276        assert_eq_m512(r, e);
47277        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47278        let e = _mm512_set1_ps(0.9999999);
47279        assert_eq_m512(r, e);
47280    }
47281
47282    #[simd_test(enable = "avx512f")]
47283    unsafe fn test_mm512_mask_fnmsub_round_ps() {
47284        let a = _mm512_set1_ps(0.00000007);
47285        let b = _mm512_set1_ps(1.);
47286        let c = _mm512_set1_ps(-1.);
47287        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47288            a, 0, b, c,
47289        );
47290        assert_eq_m512(r, a);
47291        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47292            a,
47293            0b00000000_11111111,
47294            b,
47295            c,
47296        );
47297        let e = _mm512_setr_ps(
47298            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47299            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47300            0.00000007, 0.00000007,
47301        );
47302        assert_eq_m512(r, e);
47303    }
47304
47305    #[simd_test(enable = "avx512f")]
47306    unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47307        let a = _mm512_set1_ps(0.00000007);
47308        let b = _mm512_set1_ps(1.);
47309        let c = _mm512_set1_ps(-1.);
47310        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47311            0, a, b, c,
47312        );
47313        assert_eq_m512(r, _mm512_setzero_ps());
47314        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47315            0b00000000_11111111,
47316            a,
47317            b,
47318            c,
47319        );
47320        let e = _mm512_setr_ps(
47321            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47322            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47323        );
47324        assert_eq_m512(r, e);
47325    }
47326
47327    #[simd_test(enable = "avx512f")]
47328    unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47329        let a = _mm512_set1_ps(0.00000007);
47330        let b = _mm512_set1_ps(1.);
47331        let c = _mm512_set1_ps(-1.);
47332        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47333            a, b, c, 0,
47334        );
47335        assert_eq_m512(r, c);
47336        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47337            a,
47338            b,
47339            c,
47340            0b00000000_11111111,
47341        );
47342        let e = _mm512_setr_ps(
47343            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47344            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
47345        );
47346        assert_eq_m512(r, e);
47347    }
47348
47349    #[simd_test(enable = "avx512f")]
47350    unsafe fn test_mm512_max_round_ps() {
47351        let a = _mm512_setr_ps(
47352            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47353        );
47354        let b = _mm512_setr_ps(
47355            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47356        );
47357        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47358        let e = _mm512_setr_ps(
47359            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47360        );
47361        assert_eq_m512(r, e);
47362    }
47363
47364    #[simd_test(enable = "avx512f")]
47365    unsafe fn test_mm512_mask_max_round_ps() {
47366        let a = _mm512_setr_ps(
47367            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47368        );
47369        let b = _mm512_setr_ps(
47370            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47371        );
47372        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47373        assert_eq_m512(r, a);
47374        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47375        let e = _mm512_setr_ps(
47376            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47377        );
47378        assert_eq_m512(r, e);
47379    }
47380
47381    #[simd_test(enable = "avx512f")]
47382    unsafe fn test_mm512_maskz_max_round_ps() {
47383        let a = _mm512_setr_ps(
47384            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47385        );
47386        let b = _mm512_setr_ps(
47387            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47388        );
47389        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47390        assert_eq_m512(r, _mm512_setzero_ps());
47391        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47392        let e = _mm512_setr_ps(
47393            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47394        );
47395        assert_eq_m512(r, e);
47396    }
47397
47398    #[simd_test(enable = "avx512f")]
47399    unsafe fn test_mm512_min_round_ps() {
47400        let a = _mm512_setr_ps(
47401            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47402        );
47403        let b = _mm512_setr_ps(
47404            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47405        );
47406        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47407        let e = _mm512_setr_ps(
47408            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
47409        );
47410        assert_eq_m512(r, e);
47411    }
47412
47413    #[simd_test(enable = "avx512f")]
47414    unsafe fn test_mm512_mask_min_round_ps() {
47415        let a = _mm512_setr_ps(
47416            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47417        );
47418        let b = _mm512_setr_ps(
47419            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47420        );
47421        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47422        assert_eq_m512(r, a);
47423        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47424        let e = _mm512_setr_ps(
47425            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47426        );
47427        assert_eq_m512(r, e);
47428    }
47429
47430    #[simd_test(enable = "avx512f")]
47431    unsafe fn test_mm512_maskz_min_round_ps() {
47432        let a = _mm512_setr_ps(
47433            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47434        );
47435        let b = _mm512_setr_ps(
47436            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47437        );
47438        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47439        assert_eq_m512(r, _mm512_setzero_ps());
47440        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47441        let e = _mm512_setr_ps(
47442            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
47443        );
47444        assert_eq_m512(r, e);
47445    }
47446
47447    #[simd_test(enable = "avx512f")]
47448    unsafe fn test_mm512_getexp_round_ps() {
47449        let a = _mm512_set1_ps(3.);
47450        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47451        let e = _mm512_set1_ps(1.);
47452        assert_eq_m512(r, e);
47453    }
47454
47455    #[simd_test(enable = "avx512f")]
47456    unsafe fn test_mm512_mask_getexp_round_ps() {
47457        let a = _mm512_set1_ps(3.);
47458        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
47459        assert_eq_m512(r, a);
47460        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
47461        let e = _mm512_setr_ps(
47462            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47463        );
47464        assert_eq_m512(r, e);
47465    }
47466
47467    #[simd_test(enable = "avx512f")]
47468    unsafe fn test_mm512_maskz_getexp_round_ps() {
47469        let a = _mm512_set1_ps(3.);
47470        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
47471        assert_eq_m512(r, _mm512_setzero_ps());
47472        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
47473        let e = _mm512_setr_ps(
47474            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47475        );
47476        assert_eq_m512(r, e);
47477    }
47478
47479    #[simd_test(enable = "avx512f")]
47480    unsafe fn test_mm512_roundscale_round_ps() {
47481        let a = _mm512_set1_ps(1.1);
47482        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
47483        let e = _mm512_set1_ps(1.0);
47484        assert_eq_m512(r, e);
47485    }
47486
47487    #[simd_test(enable = "avx512f")]
47488    unsafe fn test_mm512_mask_roundscale_round_ps() {
47489        let a = _mm512_set1_ps(1.1);
47490        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
47491        let e = _mm512_set1_ps(1.1);
47492        assert_eq_m512(r, e);
47493        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
47494            a,
47495            0b11111111_11111111,
47496            a,
47497        );
47498        let e = _mm512_set1_ps(1.0);
47499        assert_eq_m512(r, e);
47500    }
47501
47502    #[simd_test(enable = "avx512f")]
47503    unsafe fn test_mm512_maskz_roundscale_round_ps() {
47504        let a = _mm512_set1_ps(1.1);
47505        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
47506        assert_eq_m512(r, _mm512_setzero_ps());
47507        let r =
47508            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
47509        let e = _mm512_set1_ps(1.0);
47510        assert_eq_m512(r, e);
47511    }
47512
47513    #[simd_test(enable = "avx512f")]
47514    unsafe fn test_mm512_scalef_round_ps() {
47515        let a = _mm512_set1_ps(1.);
47516        let b = _mm512_set1_ps(3.);
47517        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
47518        let e = _mm512_set1_ps(8.);
47519        assert_eq_m512(r, e);
47520    }
47521
47522    #[simd_test(enable = "avx512f")]
47523    unsafe fn test_mm512_mask_scalef_round_ps() {
47524        let a = _mm512_set1_ps(1.);
47525        let b = _mm512_set1_ps(3.);
47526        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47527            a, 0, a, b,
47528        );
47529        assert_eq_m512(r, a);
47530        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47531            a,
47532            0b11111111_00000000,
47533            a,
47534            b,
47535        );
47536        let e = _mm512_set_ps(
47537            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47538        );
47539        assert_eq_m512(r, e);
47540    }
47541
47542    #[simd_test(enable = "avx512f")]
47543    unsafe fn test_mm512_maskz_scalef_round_ps() {
47544        let a = _mm512_set1_ps(1.);
47545        let b = _mm512_set1_ps(3.);
47546        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47547            0, a, b,
47548        );
47549        assert_eq_m512(r, _mm512_setzero_ps());
47550        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47551            0b11111111_00000000,
47552            a,
47553            b,
47554        );
47555        let e = _mm512_set_ps(
47556            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47557        );
47558        assert_eq_m512(r, e);
47559    }
47560
47561    #[simd_test(enable = "avx512f")]
47562    unsafe fn test_mm512_fixupimm_round_ps() {
47563        let a = _mm512_set1_ps(f32::NAN);
47564        let b = _mm512_set1_ps(f32::MAX);
47565        let c = _mm512_set1_epi32(i32::MAX);
47566        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47567        let e = _mm512_set1_ps(0.0);
47568        assert_eq_m512(r, e);
47569    }
47570
47571    #[simd_test(enable = "avx512f")]
47572    unsafe fn test_mm512_mask_fixupimm_round_ps() {
47573        #[rustfmt::skip]
47574        let a = _mm512_set_ps(
47575            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47576            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47577            1., 1., 1., 1.,
47578            1., 1., 1., 1.,
47579        );
47580        let b = _mm512_set1_ps(f32::MAX);
47581        let c = _mm512_set1_epi32(i32::MAX);
47582        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47583            a,
47584            0b11111111_00000000,
47585            b,
47586            c,
47587        );
47588        let e = _mm512_set_ps(
47589            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47590        );
47591        assert_eq_m512(r, e);
47592    }
47593
47594    #[simd_test(enable = "avx512f")]
47595    unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47596        #[rustfmt::skip]
47597        let a = _mm512_set_ps(
47598            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47599            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47600            1., 1., 1., 1.,
47601            1., 1., 1., 1.,
47602        );
47603        let b = _mm512_set1_ps(f32::MAX);
47604        let c = _mm512_set1_epi32(i32::MAX);
47605        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47606            0b11111111_00000000,
47607            a,
47608            b,
47609            c,
47610        );
47611        let e = _mm512_set_ps(
47612            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
47613        );
47614        assert_eq_m512(r, e);
47615    }
47616
47617    #[simd_test(enable = "avx512f")]
47618    unsafe fn test_mm512_getmant_round_ps() {
47619        let a = _mm512_set1_ps(10.);
47620        let r = _mm512_getmant_round_ps::<
47621            _MM_MANT_NORM_1_2,
47622            _MM_MANT_SIGN_SRC,
47623            _MM_FROUND_CUR_DIRECTION,
47624        >(a);
47625        let e = _mm512_set1_ps(1.25);
47626        assert_eq_m512(r, e);
47627    }
47628
47629    #[simd_test(enable = "avx512f")]
47630    unsafe fn test_mm512_mask_getmant_round_ps() {
47631        let a = _mm512_set1_ps(10.);
47632        let r = _mm512_mask_getmant_round_ps::<
47633            _MM_MANT_NORM_1_2,
47634            _MM_MANT_SIGN_SRC,
47635            _MM_FROUND_CUR_DIRECTION,
47636        >(a, 0, a);
47637        assert_eq_m512(r, a);
47638        let r = _mm512_mask_getmant_round_ps::<
47639            _MM_MANT_NORM_1_2,
47640            _MM_MANT_SIGN_SRC,
47641            _MM_FROUND_CUR_DIRECTION,
47642        >(a, 0b11111111_00000000, a);
47643        let e = _mm512_setr_ps(
47644            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47645        );
47646        assert_eq_m512(r, e);
47647    }
47648
47649    #[simd_test(enable = "avx512f")]
47650    unsafe fn test_mm512_maskz_getmant_round_ps() {
47651        let a = _mm512_set1_ps(10.);
47652        let r = _mm512_maskz_getmant_round_ps::<
47653            _MM_MANT_NORM_1_2,
47654            _MM_MANT_SIGN_SRC,
47655            _MM_FROUND_CUR_DIRECTION,
47656        >(0, a);
47657        assert_eq_m512(r, _mm512_setzero_ps());
47658        let r = _mm512_maskz_getmant_round_ps::<
47659            _MM_MANT_NORM_1_2,
47660            _MM_MANT_SIGN_SRC,
47661            _MM_FROUND_CUR_DIRECTION,
47662        >(0b11111111_00000000, a);
47663        let e = _mm512_setr_ps(
47664            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47665        );
47666        assert_eq_m512(r, e);
47667    }
47668
47669    #[simd_test(enable = "avx512f")]
47670    unsafe fn test_mm512_cvtps_epi32() {
47671        let a = _mm512_setr_ps(
47672            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47673        );
47674        let r = _mm512_cvtps_epi32(a);
47675        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47676        assert_eq_m512i(r, e);
47677    }
47678
47679    #[simd_test(enable = "avx512f")]
47680    unsafe fn test_mm512_mask_cvtps_epi32() {
47681        let a = _mm512_setr_ps(
47682            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47683        );
47684        let src = _mm512_set1_epi32(0);
47685        let r = _mm512_mask_cvtps_epi32(src, 0, a);
47686        assert_eq_m512i(r, src);
47687        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
47688        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47689        assert_eq_m512i(r, e);
47690    }
47691
47692    #[simd_test(enable = "avx512f")]
47693    unsafe fn test_mm512_maskz_cvtps_epi32() {
47694        let a = _mm512_setr_ps(
47695            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47696        );
47697        let r = _mm512_maskz_cvtps_epi32(0, a);
47698        assert_eq_m512i(r, _mm512_setzero_si512());
47699        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
47700        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47701        assert_eq_m512i(r, e);
47702    }
47703
47704    #[simd_test(enable = "avx512f,avx512vl")]
47705    unsafe fn test_mm256_mask_cvtps_epi32() {
47706        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47707        let src = _mm256_set1_epi32(0);
47708        let r = _mm256_mask_cvtps_epi32(src, 0, a);
47709        assert_eq_m256i(r, src);
47710        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
47711        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47712        assert_eq_m256i(r, e);
47713    }
47714
47715    #[simd_test(enable = "avx512f,avx512vl")]
47716    unsafe fn test_mm256_maskz_cvtps_epi32() {
47717        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47718        let r = _mm256_maskz_cvtps_epi32(0, a);
47719        assert_eq_m256i(r, _mm256_setzero_si256());
47720        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
47721        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47722        assert_eq_m256i(r, e);
47723    }
47724
47725    #[simd_test(enable = "avx512f,avx512vl")]
47726    unsafe fn test_mm_mask_cvtps_epi32() {
47727        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47728        let src = _mm_set1_epi32(0);
47729        let r = _mm_mask_cvtps_epi32(src, 0, a);
47730        assert_eq_m128i(r, src);
47731        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
47732        let e = _mm_set_epi32(12, 14, 14, 16);
47733        assert_eq_m128i(r, e);
47734    }
47735
47736    #[simd_test(enable = "avx512f,avx512vl")]
47737    unsafe fn test_mm_maskz_cvtps_epi32() {
47738        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47739        let r = _mm_maskz_cvtps_epi32(0, a);
47740        assert_eq_m128i(r, _mm_setzero_si128());
47741        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
47742        let e = _mm_set_epi32(12, 14, 14, 16);
47743        assert_eq_m128i(r, e);
47744    }
47745
47746    #[simd_test(enable = "avx512f")]
47747    unsafe fn test_mm512_cvtps_epu32() {
47748        let a = _mm512_setr_ps(
47749            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47750        );
47751        let r = _mm512_cvtps_epu32(a);
47752        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47753        assert_eq_m512i(r, e);
47754    }
47755
47756    #[simd_test(enable = "avx512f")]
47757    unsafe fn test_mm512_mask_cvtps_epu32() {
47758        let a = _mm512_setr_ps(
47759            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47760        );
47761        let src = _mm512_set1_epi32(0);
47762        let r = _mm512_mask_cvtps_epu32(src, 0, a);
47763        assert_eq_m512i(r, src);
47764        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
47765        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47766        assert_eq_m512i(r, e);
47767    }
47768
47769    #[simd_test(enable = "avx512f")]
47770    unsafe fn test_mm512_maskz_cvtps_epu32() {
47771        let a = _mm512_setr_ps(
47772            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47773        );
47774        let r = _mm512_maskz_cvtps_epu32(0, a);
47775        assert_eq_m512i(r, _mm512_setzero_si512());
47776        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
47777        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47778        assert_eq_m512i(r, e);
47779    }
47780
47781    #[simd_test(enable = "avx512f,avx512vl")]
47782    unsafe fn test_mm256_cvtps_epu32() {
47783        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47784        let r = _mm256_cvtps_epu32(a);
47785        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47786        assert_eq_m256i(r, e);
47787    }
47788
47789    #[simd_test(enable = "avx512f,avx512vl")]
47790    unsafe fn test_mm256_mask_cvtps_epu32() {
47791        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47792        let src = _mm256_set1_epi32(0);
47793        let r = _mm256_mask_cvtps_epu32(src, 0, a);
47794        assert_eq_m256i(r, src);
47795        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
47796        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47797        assert_eq_m256i(r, e);
47798    }
47799
47800    #[simd_test(enable = "avx512f,avx512vl")]
47801    unsafe fn test_mm256_maskz_cvtps_epu32() {
47802        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47803        let r = _mm256_maskz_cvtps_epu32(0, a);
47804        assert_eq_m256i(r, _mm256_setzero_si256());
47805        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
47806        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47807        assert_eq_m256i(r, e);
47808    }
47809
47810    #[simd_test(enable = "avx512f,avx512vl")]
47811    unsafe fn test_mm_cvtps_epu32() {
47812        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47813        let r = _mm_cvtps_epu32(a);
47814        let e = _mm_set_epi32(12, 14, 14, 16);
47815        assert_eq_m128i(r, e);
47816    }
47817
47818    #[simd_test(enable = "avx512f,avx512vl")]
47819    unsafe fn test_mm_mask_cvtps_epu32() {
47820        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47821        let src = _mm_set1_epi32(0);
47822        let r = _mm_mask_cvtps_epu32(src, 0, a);
47823        assert_eq_m128i(r, src);
47824        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
47825        let e = _mm_set_epi32(12, 14, 14, 16);
47826        assert_eq_m128i(r, e);
47827    }
47828
47829    #[simd_test(enable = "avx512f,avx512vl")]
47830    unsafe fn test_mm_maskz_cvtps_epu32() {
47831        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47832        let r = _mm_maskz_cvtps_epu32(0, a);
47833        assert_eq_m128i(r, _mm_setzero_si128());
47834        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
47835        let e = _mm_set_epi32(12, 14, 14, 16);
47836        assert_eq_m128i(r, e);
47837    }
47838
47839    #[simd_test(enable = "avx512f")]
47840    unsafe fn test_mm512_cvtepi8_epi32() {
47841        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47842        let r = _mm512_cvtepi8_epi32(a);
47843        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47844        assert_eq_m512i(r, e);
47845    }
47846
47847    #[simd_test(enable = "avx512f")]
47848    unsafe fn test_mm512_mask_cvtepi8_epi32() {
47849        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47850        let src = _mm512_set1_epi32(-1);
47851        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
47852        assert_eq_m512i(r, src);
47853        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
47854        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47855        assert_eq_m512i(r, e);
47856    }
47857
47858    #[simd_test(enable = "avx512f")]
47859    unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47860        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47861        let r = _mm512_maskz_cvtepi8_epi32(0, a);
47862        assert_eq_m512i(r, _mm512_setzero_si512());
47863        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
47864        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47865        assert_eq_m512i(r, e);
47866    }
47867
47868    #[simd_test(enable = "avx512f,avx512vl")]
47869    unsafe fn test_mm256_mask_cvtepi8_epi32() {
47870        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47871        let src = _mm256_set1_epi32(-1);
47872        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
47873        assert_eq_m256i(r, src);
47874        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
47875        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47876        assert_eq_m256i(r, e);
47877    }
47878
47879    #[simd_test(enable = "avx512f,avx512vl")]
47880    unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47881        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47882        let r = _mm256_maskz_cvtepi8_epi32(0, a);
47883        assert_eq_m256i(r, _mm256_setzero_si256());
47884        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
47885        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47886        assert_eq_m256i(r, e);
47887    }
47888
47889    #[simd_test(enable = "avx512f,avx512vl")]
47890    unsafe fn test_mm_mask_cvtepi8_epi32() {
47891        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47892        let src = _mm_set1_epi32(-1);
47893        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
47894        assert_eq_m128i(r, src);
47895        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
47896        let e = _mm_set_epi32(12, 13, 14, 15);
47897        assert_eq_m128i(r, e);
47898    }
47899
47900    #[simd_test(enable = "avx512f,avx512vl")]
47901    unsafe fn test_mm_maskz_cvtepi8_epi32() {
47902        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47903        let r = _mm_maskz_cvtepi8_epi32(0, a);
47904        assert_eq_m128i(r, _mm_setzero_si128());
47905        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
47906        let e = _mm_set_epi32(12, 13, 14, 15);
47907        assert_eq_m128i(r, e);
47908    }
47909
47910    #[simd_test(enable = "avx512f")]
47911    unsafe fn test_mm512_cvtepu8_epi32() {
47912        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47913        let r = _mm512_cvtepu8_epi32(a);
47914        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47915        assert_eq_m512i(r, e);
47916    }
47917
47918    #[simd_test(enable = "avx512f")]
47919    unsafe fn test_mm512_mask_cvtepu8_epi32() {
47920        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47921        let src = _mm512_set1_epi32(-1);
47922        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
47923        assert_eq_m512i(r, src);
47924        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
47925        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47926        assert_eq_m512i(r, e);
47927    }
47928
47929    #[simd_test(enable = "avx512f")]
47930    unsafe fn test_mm512_maskz_cvtepu8_epi32() {
47931        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47932        let r = _mm512_maskz_cvtepu8_epi32(0, a);
47933        assert_eq_m512i(r, _mm512_setzero_si512());
47934        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
47935        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47936        assert_eq_m512i(r, e);
47937    }
47938
47939    #[simd_test(enable = "avx512f,avx512vl")]
47940    unsafe fn test_mm256_mask_cvtepu8_epi32() {
47941        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47942        let src = _mm256_set1_epi32(-1);
47943        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
47944        assert_eq_m256i(r, src);
47945        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
47946        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47947        assert_eq_m256i(r, e);
47948    }
47949
47950    #[simd_test(enable = "avx512f,avx512vl")]
47951    unsafe fn test_mm256_maskz_cvtepu8_epi32() {
47952        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47953        let r = _mm256_maskz_cvtepu8_epi32(0, a);
47954        assert_eq_m256i(r, _mm256_setzero_si256());
47955        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
47956        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47957        assert_eq_m256i(r, e);
47958    }
47959
47960    #[simd_test(enable = "avx512f,avx512vl")]
47961    unsafe fn test_mm_mask_cvtepu8_epi32() {
47962        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47963        let src = _mm_set1_epi32(-1);
47964        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
47965        assert_eq_m128i(r, src);
47966        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
47967        let e = _mm_set_epi32(12, 13, 14, 15);
47968        assert_eq_m128i(r, e);
47969    }
47970
47971    #[simd_test(enable = "avx512f,avx512vl")]
47972    unsafe fn test_mm_maskz_cvtepu8_epi32() {
47973        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47974        let r = _mm_maskz_cvtepu8_epi32(0, a);
47975        assert_eq_m128i(r, _mm_setzero_si128());
47976        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
47977        let e = _mm_set_epi32(12, 13, 14, 15);
47978        assert_eq_m128i(r, e);
47979    }
47980
47981    #[simd_test(enable = "avx512f")]
47982    unsafe fn test_mm512_cvtepi16_epi32() {
47983        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47984        let r = _mm512_cvtepi16_epi32(a);
47985        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47986        assert_eq_m512i(r, e);
47987    }
47988
47989    #[simd_test(enable = "avx512f")]
47990    unsafe fn test_mm512_mask_cvtepi16_epi32() {
47991        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47992        let src = _mm512_set1_epi32(-1);
47993        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
47994        assert_eq_m512i(r, src);
47995        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
47996        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47997        assert_eq_m512i(r, e);
47998    }
47999
48000    #[simd_test(enable = "avx512f")]
48001    unsafe fn test_mm512_maskz_cvtepi16_epi32() {
48002        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48003        let r = _mm512_maskz_cvtepi16_epi32(0, a);
48004        assert_eq_m512i(r, _mm512_setzero_si512());
48005        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
48006        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48007        assert_eq_m512i(r, e);
48008    }
48009
48010    #[simd_test(enable = "avx512f,avx512vl")]
48011    unsafe fn test_mm256_mask_cvtepi16_epi32() {
48012        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48013        let src = _mm256_set1_epi32(-1);
48014        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
48015        assert_eq_m256i(r, src);
48016        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
48017        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48018        assert_eq_m256i(r, e);
48019    }
48020
48021    #[simd_test(enable = "avx512f,avx512vl")]
48022    unsafe fn test_mm256_maskz_cvtepi16_epi32() {
48023        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48024        let r = _mm256_maskz_cvtepi16_epi32(0, a);
48025        assert_eq_m256i(r, _mm256_setzero_si256());
48026        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
48027        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48028        assert_eq_m256i(r, e);
48029    }
48030
48031    #[simd_test(enable = "avx512f,avx512vl")]
48032    unsafe fn test_mm_mask_cvtepi16_epi32() {
48033        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48034        let src = _mm_set1_epi32(-1);
48035        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
48036        assert_eq_m128i(r, src);
48037        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
48038        let e = _mm_set_epi32(4, 5, 6, 7);
48039        assert_eq_m128i(r, e);
48040    }
48041
48042    #[simd_test(enable = "avx512f,avx512vl")]
48043    unsafe fn test_mm_maskz_cvtepi16_epi32() {
48044        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48045        let r = _mm_maskz_cvtepi16_epi32(0, a);
48046        assert_eq_m128i(r, _mm_setzero_si128());
48047        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
48048        let e = _mm_set_epi32(4, 5, 6, 7);
48049        assert_eq_m128i(r, e);
48050    }
48051
48052    #[simd_test(enable = "avx512f")]
48053    unsafe fn test_mm512_cvtepu16_epi32() {
48054        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48055        let r = _mm512_cvtepu16_epi32(a);
48056        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48057        assert_eq_m512i(r, e);
48058    }
48059
48060    #[simd_test(enable = "avx512f")]
48061    unsafe fn test_mm512_mask_cvtepu16_epi32() {
48062        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48063        let src = _mm512_set1_epi32(-1);
48064        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
48065        assert_eq_m512i(r, src);
48066        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
48067        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48068        assert_eq_m512i(r, e);
48069    }
48070
48071    #[simd_test(enable = "avx512f")]
48072    unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48073        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48074        let r = _mm512_maskz_cvtepu16_epi32(0, a);
48075        assert_eq_m512i(r, _mm512_setzero_si512());
48076        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
48077        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48078        assert_eq_m512i(r, e);
48079    }
48080
48081    #[simd_test(enable = "avx512f,avx512vl")]
48082    unsafe fn test_mm256_mask_cvtepu16_epi32() {
48083        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48084        let src = _mm256_set1_epi32(-1);
48085        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
48086        assert_eq_m256i(r, src);
48087        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
48088        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48089        assert_eq_m256i(r, e);
48090    }
48091
48092    #[simd_test(enable = "avx512f,avx512vl")]
48093    unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48094        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48095        let r = _mm256_maskz_cvtepu16_epi32(0, a);
48096        assert_eq_m256i(r, _mm256_setzero_si256());
48097        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
48098        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48099        assert_eq_m256i(r, e);
48100    }
48101
48102    #[simd_test(enable = "avx512f,avx512vl")]
48103    unsafe fn test_mm_mask_cvtepu16_epi32() {
48104        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48105        let src = _mm_set1_epi32(-1);
48106        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
48107        assert_eq_m128i(r, src);
48108        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
48109        let e = _mm_set_epi32(12, 13, 14, 15);
48110        assert_eq_m128i(r, e);
48111    }
48112
48113    #[simd_test(enable = "avx512f,avx512vl")]
48114    unsafe fn test_mm_maskz_cvtepu16_epi32() {
48115        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48116        let r = _mm_maskz_cvtepu16_epi32(0, a);
48117        assert_eq_m128i(r, _mm_setzero_si128());
48118        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
48119        let e = _mm_set_epi32(12, 13, 14, 15);
48120        assert_eq_m128i(r, e);
48121    }
48122
48123    #[simd_test(enable = "avx512f")]
48124    unsafe fn test_mm512_cvtepi32_ps() {
48125        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48126        let r = _mm512_cvtepi32_ps(a);
48127        let e = _mm512_set_ps(
48128            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48129        );
48130        assert_eq_m512(r, e);
48131    }
48132
48133    #[simd_test(enable = "avx512f")]
48134    unsafe fn test_mm512_mask_cvtepi32_ps() {
48135        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48136        let src = _mm512_set1_ps(-1.);
48137        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
48138        assert_eq_m512(r, src);
48139        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
48140        let e = _mm512_set_ps(
48141            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48142        );
48143        assert_eq_m512(r, e);
48144    }
48145
48146    #[simd_test(enable = "avx512f")]
48147    unsafe fn test_mm512_maskz_cvtepi32_ps() {
48148        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48149        let r = _mm512_maskz_cvtepi32_ps(0, a);
48150        assert_eq_m512(r, _mm512_setzero_ps());
48151        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
48152        let e = _mm512_set_ps(
48153            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48154        );
48155        assert_eq_m512(r, e);
48156    }
48157
48158    #[simd_test(enable = "avx512f,avx512vl")]
48159    unsafe fn test_mm256_mask_cvtepi32_ps() {
48160        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48161        let src = _mm256_set1_ps(-1.);
48162        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
48163        assert_eq_m256(r, src);
48164        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
48165        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48166        assert_eq_m256(r, e);
48167    }
48168
48169    #[simd_test(enable = "avx512f,avx512vl")]
48170    unsafe fn test_mm256_maskz_cvtepi32_ps() {
48171        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48172        let r = _mm256_maskz_cvtepi32_ps(0, a);
48173        assert_eq_m256(r, _mm256_setzero_ps());
48174        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
48175        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48176        assert_eq_m256(r, e);
48177    }
48178
48179    #[simd_test(enable = "avx512f,avx512vl")]
48180    unsafe fn test_mm_mask_cvtepi32_ps() {
48181        let a = _mm_set_epi32(1, 2, 3, 4);
48182        let src = _mm_set1_ps(-1.);
48183        let r = _mm_mask_cvtepi32_ps(src, 0, a);
48184        assert_eq_m128(r, src);
48185        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
48186        let e = _mm_set_ps(1., 2., 3., 4.);
48187        assert_eq_m128(r, e);
48188    }
48189
48190    #[simd_test(enable = "avx512f,avx512vl")]
48191    unsafe fn test_mm_maskz_cvtepi32_ps() {
48192        let a = _mm_set_epi32(1, 2, 3, 4);
48193        let r = _mm_maskz_cvtepi32_ps(0, a);
48194        assert_eq_m128(r, _mm_setzero_ps());
48195        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
48196        let e = _mm_set_ps(1., 2., 3., 4.);
48197        assert_eq_m128(r, e);
48198    }
48199
48200    #[simd_test(enable = "avx512f")]
48201    unsafe fn test_mm512_cvtepu32_ps() {
48202        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48203        let r = _mm512_cvtepu32_ps(a);
48204        let e = _mm512_set_ps(
48205            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48206        );
48207        assert_eq_m512(r, e);
48208    }
48209
48210    #[simd_test(enable = "avx512f")]
48211    unsafe fn test_mm512_mask_cvtepu32_ps() {
48212        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48213        let src = _mm512_set1_ps(-1.);
48214        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
48215        assert_eq_m512(r, src);
48216        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
48217        let e = _mm512_set_ps(
48218            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48219        );
48220        assert_eq_m512(r, e);
48221    }
48222
48223    #[simd_test(enable = "avx512f")]
48224    unsafe fn test_mm512_maskz_cvtepu32_ps() {
48225        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48226        let r = _mm512_maskz_cvtepu32_ps(0, a);
48227        assert_eq_m512(r, _mm512_setzero_ps());
48228        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
48229        let e = _mm512_set_ps(
48230            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48231        );
48232        assert_eq_m512(r, e);
48233    }
48234
48235    #[simd_test(enable = "avx512f")]
48236    unsafe fn test_mm512_cvtepi32_epi16() {
48237        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48238        let r = _mm512_cvtepi32_epi16(a);
48239        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48240        assert_eq_m256i(r, e);
48241    }
48242
48243    #[simd_test(enable = "avx512f")]
48244    unsafe fn test_mm512_mask_cvtepi32_epi16() {
48245        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48246        let src = _mm256_set1_epi16(-1);
48247        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
48248        assert_eq_m256i(r, src);
48249        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
48250        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48251        assert_eq_m256i(r, e);
48252    }
48253
48254    #[simd_test(enable = "avx512f")]
48255    unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48256        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48257        let r = _mm512_maskz_cvtepi32_epi16(0, a);
48258        assert_eq_m256i(r, _mm256_setzero_si256());
48259        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
48260        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48261        assert_eq_m256i(r, e);
48262    }
48263
48264    #[simd_test(enable = "avx512f,avx512vl")]
48265    unsafe fn test_mm256_cvtepi32_epi16() {
48266        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48267        let r = _mm256_cvtepi32_epi16(a);
48268        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48269        assert_eq_m128i(r, e);
48270    }
48271
48272    #[simd_test(enable = "avx512f,avx512vl")]
48273    unsafe fn test_mm256_mask_cvtepi32_epi16() {
48274        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48275        let src = _mm_set1_epi16(-1);
48276        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
48277        assert_eq_m128i(r, src);
48278        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
48279        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48280        assert_eq_m128i(r, e);
48281    }
48282
48283    #[simd_test(enable = "avx512f,avx512vl")]
48284    unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48285        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48286        let r = _mm256_maskz_cvtepi32_epi16(0, a);
48287        assert_eq_m128i(r, _mm_setzero_si128());
48288        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
48289        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48290        assert_eq_m128i(r, e);
48291    }
48292
48293    #[simd_test(enable = "avx512f,avx512vl")]
48294    unsafe fn test_mm_cvtepi32_epi16() {
48295        let a = _mm_set_epi32(4, 5, 6, 7);
48296        let r = _mm_cvtepi32_epi16(a);
48297        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48298        assert_eq_m128i(r, e);
48299    }
48300
48301    #[simd_test(enable = "avx512f,avx512vl")]
48302    unsafe fn test_mm_mask_cvtepi32_epi16() {
48303        let a = _mm_set_epi32(4, 5, 6, 7);
48304        let src = _mm_set1_epi16(0);
48305        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
48306        assert_eq_m128i(r, src);
48307        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
48308        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48309        assert_eq_m128i(r, e);
48310    }
48311
48312    #[simd_test(enable = "avx512f,avx512vl")]
48313    unsafe fn test_mm_maskz_cvtepi32_epi16() {
48314        let a = _mm_set_epi32(4, 5, 6, 7);
48315        let r = _mm_maskz_cvtepi32_epi16(0, a);
48316        assert_eq_m128i(r, _mm_setzero_si128());
48317        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
48318        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48319        assert_eq_m128i(r, e);
48320    }
48321
48322    #[simd_test(enable = "avx512f")]
48323    unsafe fn test_mm512_cvtepi32_epi8() {
48324        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48325        let r = _mm512_cvtepi32_epi8(a);
48326        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48327        assert_eq_m128i(r, e);
48328    }
48329
48330    #[simd_test(enable = "avx512f")]
48331    unsafe fn test_mm512_mask_cvtepi32_epi8() {
48332        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48333        let src = _mm_set1_epi8(-1);
48334        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
48335        assert_eq_m128i(r, src);
48336        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
48337        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48338        assert_eq_m128i(r, e);
48339    }
48340
48341    #[simd_test(enable = "avx512f")]
48342    unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48343        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48344        let r = _mm512_maskz_cvtepi32_epi8(0, a);
48345        assert_eq_m128i(r, _mm_setzero_si128());
48346        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
48347        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48348        assert_eq_m128i(r, e);
48349    }
48350
48351    #[simd_test(enable = "avx512f,avx512vl")]
48352    unsafe fn test_mm256_cvtepi32_epi8() {
48353        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48354        let r = _mm256_cvtepi32_epi8(a);
48355        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48356        assert_eq_m128i(r, e);
48357    }
48358
48359    #[simd_test(enable = "avx512f,avx512vl")]
48360    unsafe fn test_mm256_mask_cvtepi32_epi8() {
48361        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48362        let src = _mm_set1_epi8(0);
48363        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
48364        assert_eq_m128i(r, src);
48365        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
48366        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48367        assert_eq_m128i(r, e);
48368    }
48369
48370    #[simd_test(enable = "avx512f,avx512vl")]
48371    unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48372        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48373        let r = _mm256_maskz_cvtepi32_epi8(0, a);
48374        assert_eq_m128i(r, _mm_setzero_si128());
48375        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
48376        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48377        assert_eq_m128i(r, e);
48378    }
48379
48380    #[simd_test(enable = "avx512f,avx512vl")]
48381    unsafe fn test_mm_cvtepi32_epi8() {
48382        let a = _mm_set_epi32(4, 5, 6, 7);
48383        let r = _mm_cvtepi32_epi8(a);
48384        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48385        assert_eq_m128i(r, e);
48386    }
48387
48388    #[simd_test(enable = "avx512f,avx512vl")]
48389    unsafe fn test_mm_mask_cvtepi32_epi8() {
48390        let a = _mm_set_epi32(4, 5, 6, 7);
48391        let src = _mm_set1_epi8(0);
48392        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
48393        assert_eq_m128i(r, src);
48394        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
48395        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48396        assert_eq_m128i(r, e);
48397    }
48398
48399    #[simd_test(enable = "avx512f,avx512vl")]
48400    unsafe fn test_mm_maskz_cvtepi32_epi8() {
48401        let a = _mm_set_epi32(4, 5, 6, 7);
48402        let r = _mm_maskz_cvtepi32_epi8(0, a);
48403        assert_eq_m128i(r, _mm_setzero_si128());
48404        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
48405        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48406        assert_eq_m128i(r, e);
48407    }
48408
48409    #[simd_test(enable = "avx512f")]
48410    unsafe fn test_mm512_cvtsepi32_epi16() {
48411        #[rustfmt::skip]
48412        let a = _mm512_set_epi32(
48413            0, 1, 2, 3,
48414            4, 5, 6, 7,
48415            8, 9, 10, 11,
48416            12, 13, i32::MIN, i32::MAX,
48417        );
48418        let r = _mm512_cvtsepi32_epi16(a);
48419        #[rustfmt::skip]
48420        let e = _mm256_set_epi16(
48421            0, 1, 2, 3,
48422            4, 5, 6, 7,
48423            8, 9, 10, 11,
48424            12, 13, i16::MIN, i16::MAX,
48425        );
48426        assert_eq_m256i(r, e);
48427    }
48428
48429    #[simd_test(enable = "avx512f")]
48430    unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48431        #[rustfmt::skip]
48432        let a = _mm512_set_epi32(
48433            0, 1, 2, 3,
48434            4, 5, 6, 7,
48435            8, 9, 10, 11,
48436            12, 13, i32::MIN, i32::MAX,
48437        );
48438        let src = _mm256_set1_epi16(-1);
48439        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
48440        assert_eq_m256i(r, src);
48441        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
48442        #[rustfmt::skip]
48443        let e = _mm256_set_epi16(
48444            -1, -1, -1, -1,
48445            -1, -1, -1, -1,
48446            8, 9, 10, 11,
48447            12, 13, i16::MIN, i16::MAX,
48448        );
48449        assert_eq_m256i(r, e);
48450    }
48451
48452    #[simd_test(enable = "avx512f")]
48453    unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48454        #[rustfmt::skip]
48455        let a = _mm512_set_epi32(
48456            0, 1, 2, 3,
48457            4, 5, 6, 7,
48458            8, 9, 10, 11,
48459            12, 13, i32::MIN, i32::MAX,
48460        );
48461        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
48462        assert_eq_m256i(r, _mm256_setzero_si256());
48463        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
48464        #[rustfmt::skip]
48465        let e = _mm256_set_epi16(
48466            0, 0, 0, 0,
48467            0, 0, 0, 0,
48468            8, 9, 10, 11,
48469            12, 13, i16::MIN, i16::MAX,
48470        );
48471        assert_eq_m256i(r, e);
48472    }
48473
48474    #[simd_test(enable = "avx512f,avx512vl")]
48475    unsafe fn test_mm256_cvtsepi32_epi16() {
48476        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48477        let r = _mm256_cvtsepi32_epi16(a);
48478        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48479        assert_eq_m128i(r, e);
48480    }
48481
48482    #[simd_test(enable = "avx512f,avx512vl")]
48483    unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48484        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48485        let src = _mm_set1_epi16(-1);
48486        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
48487        assert_eq_m128i(r, src);
48488        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
48489        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48490        assert_eq_m128i(r, e);
48491    }
48492
48493    #[simd_test(enable = "avx512f,avx512vl")]
48494    unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48495        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48496        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
48497        assert_eq_m128i(r, _mm_setzero_si128());
48498        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
48499        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48500        assert_eq_m128i(r, e);
48501    }
48502
48503    #[simd_test(enable = "avx512f,avx512vl")]
48504    unsafe fn test_mm_cvtsepi32_epi16() {
48505        let a = _mm_set_epi32(4, 5, 6, 7);
48506        let r = _mm_cvtsepi32_epi16(a);
48507        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48508        assert_eq_m128i(r, e);
48509    }
48510
48511    #[simd_test(enable = "avx512f,avx512vl")]
48512    unsafe fn test_mm_mask_cvtsepi32_epi16() {
48513        let a = _mm_set_epi32(4, 5, 6, 7);
48514        let src = _mm_set1_epi16(0);
48515        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
48516        assert_eq_m128i(r, src);
48517        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
48518        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48519        assert_eq_m128i(r, e);
48520    }
48521
48522    #[simd_test(enable = "avx512f,avx512vl")]
48523    unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48524        let a = _mm_set_epi32(4, 5, 6, 7);
48525        let r = _mm_maskz_cvtsepi32_epi16(0, a);
48526        assert_eq_m128i(r, _mm_setzero_si128());
48527        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
48528        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48529        assert_eq_m128i(r, e);
48530    }
48531
48532    #[simd_test(enable = "avx512f")]
48533    unsafe fn test_mm512_cvtsepi32_epi8() {
48534        #[rustfmt::skip]
48535        let a = _mm512_set_epi32(
48536            0, 1, 2, 3,
48537            4, 5, 6, 7,
48538            8, 9, 10, 11,
48539            12, 13, i32::MIN, i32::MAX,
48540        );
48541        let r = _mm512_cvtsepi32_epi8(a);
48542        #[rustfmt::skip]
48543        let e = _mm_set_epi8(
48544            0, 1, 2, 3,
48545            4, 5, 6, 7,
48546            8, 9, 10, 11,
48547            12, 13, i8::MIN, i8::MAX,
48548        );
48549        assert_eq_m128i(r, e);
48550    }
48551
48552    #[simd_test(enable = "avx512f")]
48553    unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48554        #[rustfmt::skip]
48555        let a = _mm512_set_epi32(
48556            0, 1, 2, 3,
48557            4, 5, 6, 7,
48558            8, 9, 10, 11,
48559            12, 13, i32::MIN, i32::MAX,
48560        );
48561        let src = _mm_set1_epi8(-1);
48562        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
48563        assert_eq_m128i(r, src);
48564        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
48565        #[rustfmt::skip]
48566        let e = _mm_set_epi8(
48567            -1, -1, -1, -1,
48568            -1, -1, -1, -1,
48569            8, 9, 10, 11,
48570            12, 13, i8::MIN, i8::MAX,
48571        );
48572        assert_eq_m128i(r, e);
48573    }
48574
48575    #[simd_test(enable = "avx512f")]
48576    unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48577        #[rustfmt::skip]
48578        let a = _mm512_set_epi32(
48579            0, 1, 2, 3,
48580            4, 5, 6, 7,
48581            8, 9, 10, 11,
48582            12, 13, i32::MIN, i32::MAX,
48583        );
48584        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
48585        assert_eq_m128i(r, _mm_setzero_si128());
48586        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
48587        #[rustfmt::skip]
48588        let e = _mm_set_epi8(
48589            0, 0, 0, 0,
48590            0, 0, 0, 0,
48591            8, 9, 10, 11,
48592            12, 13, i8::MIN, i8::MAX,
48593        );
48594        assert_eq_m128i(r, e);
48595    }
48596
48597    #[simd_test(enable = "avx512f,avx512vl")]
48598    unsafe fn test_mm256_cvtsepi32_epi8() {
48599        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48600        let r = _mm256_cvtsepi32_epi8(a);
48601        #[rustfmt::skip]
48602        let e = _mm_set_epi8(
48603            0, 0, 0, 0,
48604            0, 0, 0, 0,
48605            9, 10, 11, 12,
48606            13, 14, 15, 16,
48607        );
48608        assert_eq_m128i(r, e);
48609    }
48610
48611    #[simd_test(enable = "avx512f,avx512vl")]
48612    unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48613        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48614        let src = _mm_set1_epi8(0);
48615        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
48616        assert_eq_m128i(r, src);
48617        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
48618        #[rustfmt::skip]
48619        let e = _mm_set_epi8(
48620            0, 0, 0, 0,
48621            0, 0, 0, 0,
48622            9, 10, 11, 12,
48623            13, 14, 15, 16,
48624        );
48625        assert_eq_m128i(r, e);
48626    }
48627
48628    #[simd_test(enable = "avx512f,avx512vl")]
48629    unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48630        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48631        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
48632        assert_eq_m128i(r, _mm_setzero_si128());
48633        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
48634        #[rustfmt::skip]
48635        let e = _mm_set_epi8(
48636            0, 0, 0, 0,
48637            0, 0, 0, 0,
48638            9, 10, 11, 12,
48639            13, 14, 15, 16,
48640        );
48641        assert_eq_m128i(r, e);
48642    }
48643
48644    #[simd_test(enable = "avx512f,avx512vl")]
48645    unsafe fn test_mm_cvtsepi32_epi8() {
48646        let a = _mm_set_epi32(13, 14, 15, 16);
48647        let r = _mm_cvtsepi32_epi8(a);
48648        #[rustfmt::skip]
48649        let e = _mm_set_epi8(
48650            0, 0, 0, 0,
48651            0, 0, 0, 0,
48652            0, 0, 0, 0,
48653            13, 14, 15, 16,
48654        );
48655        assert_eq_m128i(r, e);
48656    }
48657
48658    #[simd_test(enable = "avx512f,avx512vl")]
48659    unsafe fn test_mm_mask_cvtsepi32_epi8() {
48660        let a = _mm_set_epi32(13, 14, 15, 16);
48661        let src = _mm_set1_epi8(0);
48662        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
48663        assert_eq_m128i(r, src);
48664        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
48665        #[rustfmt::skip]
48666        let e = _mm_set_epi8(
48667            0, 0, 0, 0,
48668            0, 0, 0, 0,
48669            0, 0, 0, 0,
48670            13, 14, 15, 16,
48671        );
48672        assert_eq_m128i(r, e);
48673    }
48674
48675    #[simd_test(enable = "avx512f,avx512vl")]
48676    unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48677        let a = _mm_set_epi32(13, 14, 15, 16);
48678        let r = _mm_maskz_cvtsepi32_epi8(0, a);
48679        assert_eq_m128i(r, _mm_setzero_si128());
48680        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
48681        #[rustfmt::skip]
48682        let e = _mm_set_epi8(
48683            0, 0, 0, 0,
48684            0, 0, 0, 0,
48685            0, 0, 0, 0,
48686            13, 14, 15, 16,
48687        );
48688        assert_eq_m128i(r, e);
48689    }
48690
48691    #[simd_test(enable = "avx512f")]
48692    unsafe fn test_mm512_cvtusepi32_epi16() {
48693        #[rustfmt::skip]
48694        let a = _mm512_set_epi32(
48695            0, 1, 2, 3,
48696            4, 5, 6, 7,
48697            8, 9, 10, 11,
48698            12, 13, i32::MIN, i32::MIN,
48699        );
48700        let r = _mm512_cvtusepi32_epi16(a);
48701        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48702        assert_eq_m256i(r, e);
48703    }
48704
48705    #[simd_test(enable = "avx512f")]
48706    unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48707        #[rustfmt::skip]
48708        let a = _mm512_set_epi32(
48709            0, 1, 2, 3,
48710            4, 5, 6, 7,
48711            8, 9, 10, 11,
48712            12, 13, i32::MIN, i32::MIN,
48713        );
48714        let src = _mm256_set1_epi16(-1);
48715        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
48716        assert_eq_m256i(r, src);
48717        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
48718        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48719        assert_eq_m256i(r, e);
48720    }
48721
48722    #[simd_test(enable = "avx512f")]
48723    unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48724        #[rustfmt::skip]
48725        let a = _mm512_set_epi32(
48726            0, 1, 2, 3,
48727            4, 5, 6, 7,
48728            8, 9, 10, 11,
48729            12, 13, i32::MIN, i32::MIN,
48730        );
48731        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
48732        assert_eq_m256i(r, _mm256_setzero_si256());
48733        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
48734        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48735        assert_eq_m256i(r, e);
48736    }
48737
48738    #[simd_test(enable = "avx512f,avx512vl")]
48739    unsafe fn test_mm256_cvtusepi32_epi16() {
48740        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48741        let r = _mm256_cvtusepi32_epi16(a);
48742        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48743        assert_eq_m128i(r, e);
48744    }
48745
48746    #[simd_test(enable = "avx512f,avx512vl")]
48747    unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48748        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48749        let src = _mm_set1_epi16(0);
48750        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
48751        assert_eq_m128i(r, src);
48752        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
48753        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48754        assert_eq_m128i(r, e);
48755    }
48756
48757    #[simd_test(enable = "avx512f,avx512vl")]
48758    unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48759        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48760        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
48761        assert_eq_m128i(r, _mm_setzero_si128());
48762        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
48763        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48764        assert_eq_m128i(r, e);
48765    }
48766
48767    #[simd_test(enable = "avx512f,avx512vl")]
48768    unsafe fn test_mm_cvtusepi32_epi16() {
48769        let a = _mm_set_epi32(5, 6, 7, 8);
48770        let r = _mm_cvtusepi32_epi16(a);
48771        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48772        assert_eq_m128i(r, e);
48773    }
48774
48775    #[simd_test(enable = "avx512f,avx512vl")]
48776    unsafe fn test_mm_mask_cvtusepi32_epi16() {
48777        let a = _mm_set_epi32(5, 6, 7, 8);
48778        let src = _mm_set1_epi16(0);
48779        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
48780        assert_eq_m128i(r, src);
48781        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
48782        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48783        assert_eq_m128i(r, e);
48784    }
48785
48786    #[simd_test(enable = "avx512f,avx512vl")]
48787    unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48788        let a = _mm_set_epi32(5, 6, 7, 8);
48789        let r = _mm_maskz_cvtusepi32_epi16(0, a);
48790        assert_eq_m128i(r, _mm_setzero_si128());
48791        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
48792        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48793        assert_eq_m128i(r, e);
48794    }
48795
48796    #[simd_test(enable = "avx512f")]
48797    unsafe fn test_mm512_cvtusepi32_epi8() {
48798        #[rustfmt::skip]
48799        let a = _mm512_set_epi32(
48800            0, 1, 2, 3,
48801            4, 5, 6, 7,
48802            8, 9, 10, 11,
48803            12, 13, i32::MIN, i32::MIN,
48804        );
48805        let r = _mm512_cvtusepi32_epi8(a);
48806        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48807        assert_eq_m128i(r, e);
48808    }
48809
48810    #[simd_test(enable = "avx512f")]
48811    unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48812        #[rustfmt::skip]
48813        let a = _mm512_set_epi32(
48814            0, 1, 2, 3,
48815            4, 5, 6, 7,
48816            8, 9, 10, 11,
48817            12, 13, i32::MIN, i32::MIN,
48818        );
48819        let src = _mm_set1_epi8(-1);
48820        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
48821        assert_eq_m128i(r, src);
48822        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
48823        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48824        assert_eq_m128i(r, e);
48825    }
48826
48827    #[simd_test(enable = "avx512f")]
48828    unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48829        #[rustfmt::skip]
48830        let a = _mm512_set_epi32(
48831            0, 1, 2, 3,
48832            4, 5, 6, 7,
48833            8, 9, 10, 11,
48834            12, 13, i32::MIN, i32::MIN,
48835        );
48836        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
48837        assert_eq_m128i(r, _mm_setzero_si128());
48838        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
48839        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48840        assert_eq_m128i(r, e);
48841    }
48842
48843    #[simd_test(enable = "avx512f,avx512vl")]
48844    unsafe fn test_mm256_cvtusepi32_epi8() {
48845        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48846        let r = _mm256_cvtusepi32_epi8(a);
48847        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48848        assert_eq_m128i(r, e);
48849    }
48850
48851    #[simd_test(enable = "avx512f,avx512vl")]
48852    unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48853        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48854        let src = _mm_set1_epi8(0);
48855        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
48856        assert_eq_m128i(r, src);
48857        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
48858        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48859        assert_eq_m128i(r, e);
48860    }
48861
48862    #[simd_test(enable = "avx512f,avx512vl")]
48863    unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48864        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48865        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
48866        assert_eq_m128i(r, _mm_setzero_si128());
48867        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
48868        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48869        assert_eq_m128i(r, e);
48870    }
48871
48872    #[simd_test(enable = "avx512f,avx512vl")]
48873    unsafe fn test_mm_cvtusepi32_epi8() {
48874        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48875        let r = _mm_cvtusepi32_epi8(a);
48876        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48877        assert_eq_m128i(r, e);
48878    }
48879
48880    #[simd_test(enable = "avx512f,avx512vl")]
48881    unsafe fn test_mm_mask_cvtusepi32_epi8() {
48882        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48883        let src = _mm_set1_epi8(0);
48884        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
48885        assert_eq_m128i(r, src);
48886        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
48887        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48888        assert_eq_m128i(r, e);
48889    }
48890
48891    #[simd_test(enable = "avx512f,avx512vl")]
48892    unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48893        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48894        let r = _mm_maskz_cvtusepi32_epi8(0, a);
48895        assert_eq_m128i(r, _mm_setzero_si128());
48896        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
48897        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48898        assert_eq_m128i(r, e);
48899    }
48900
48901    #[simd_test(enable = "avx512f")]
48902    unsafe fn test_mm512_cvt_roundps_epi32() {
48903        let a = _mm512_setr_ps(
48904            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48905        );
48906        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48907        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48908        assert_eq_m512i(r, e);
48909        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48910        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
48911        assert_eq_m512i(r, e);
48912    }
48913
48914    #[simd_test(enable = "avx512f")]
48915    unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48916        let a = _mm512_setr_ps(
48917            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48918        );
48919        let src = _mm512_set1_epi32(0);
48920        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48921            src, 0, a,
48922        );
48923        assert_eq_m512i(r, src);
48924        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48925            src,
48926            0b00000000_11111111,
48927            a,
48928        );
48929        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48930        assert_eq_m512i(r, e);
48931    }
48932
48933    #[simd_test(enable = "avx512f")]
48934    unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
48935        let a = _mm512_setr_ps(
48936            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48937        );
48938        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48939            0, a,
48940        );
48941        assert_eq_m512i(r, _mm512_setzero_si512());
48942        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48943            0b00000000_11111111,
48944            a,
48945        );
48946        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48947        assert_eq_m512i(r, e);
48948    }
48949
48950    #[simd_test(enable = "avx512f")]
48951    unsafe fn test_mm512_cvt_roundps_epu32() {
48952        let a = _mm512_setr_ps(
48953            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48954        );
48955        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48956        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
48957        assert_eq_m512i(r, e);
48958        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48959        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48960        assert_eq_m512i(r, e);
48961    }
48962
48963    #[simd_test(enable = "avx512f")]
48964    unsafe fn test_mm512_mask_cvt_roundps_epu32() {
48965        let a = _mm512_setr_ps(
48966            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48967        );
48968        let src = _mm512_set1_epi32(0);
48969        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48970            src, 0, a,
48971        );
48972        assert_eq_m512i(r, src);
48973        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48974            src,
48975            0b00000000_11111111,
48976            a,
48977        );
48978        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48979        assert_eq_m512i(r, e);
48980    }
48981
48982    #[simd_test(enable = "avx512f")]
48983    unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
48984        let a = _mm512_setr_ps(
48985            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48986        );
48987        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48988            0, a,
48989        );
48990        assert_eq_m512i(r, _mm512_setzero_si512());
48991        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48992            0b00000000_11111111,
48993            a,
48994        );
48995        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48996        assert_eq_m512i(r, e);
48997    }
48998
48999    #[simd_test(enable = "avx512f")]
49000    unsafe fn test_mm512_cvt_roundepi32_ps() {
49001        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49002        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49003        let e = _mm512_setr_ps(
49004            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
49005        );
49006        assert_eq_m512(r, e);
49007    }
49008
49009    #[simd_test(enable = "avx512f")]
49010    unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
49011        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49012        let src = _mm512_set1_ps(0.);
49013        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49014            src, 0, a,
49015        );
49016        assert_eq_m512(r, src);
49017        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49018            src,
49019            0b00000000_11111111,
49020            a,
49021        );
49022        let e = _mm512_setr_ps(
49023            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49024        );
49025        assert_eq_m512(r, e);
49026    }
49027
49028    #[simd_test(enable = "avx512f")]
49029    unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
49030        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49031        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49032            0, a,
49033        );
49034        assert_eq_m512(r, _mm512_setzero_ps());
49035        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49036            0b00000000_11111111,
49037            a,
49038        );
49039        let e = _mm512_setr_ps(
49040            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49041        );
49042        assert_eq_m512(r, e);
49043    }
49044
49045    #[simd_test(enable = "avx512f")]
49046    unsafe fn test_mm512_cvt_roundepu32_ps() {
49047        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49048        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49049        #[rustfmt::skip]
49050        let e = _mm512_setr_ps(
49051            0., 4294967300., 2., 4294967300.,
49052            4., 4294967300., 6., 4294967300.,
49053            8., 10., 10., 12.,
49054            12., 14., 14., 16.,
49055        );
49056        assert_eq_m512(r, e);
49057    }
49058
49059    #[simd_test(enable = "avx512f")]
49060    unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49061        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49062        let src = _mm512_set1_ps(0.);
49063        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49064            src, 0, a,
49065        );
49066        assert_eq_m512(r, src);
49067        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49068            src,
49069            0b00000000_11111111,
49070            a,
49071        );
49072        #[rustfmt::skip]
49073        let e = _mm512_setr_ps(
49074            0., 4294967300., 2., 4294967300.,
49075            4., 4294967300., 6., 4294967300.,
49076            0., 0., 0., 0.,
49077            0., 0., 0., 0.,
49078        );
49079        assert_eq_m512(r, e);
49080    }
49081
49082    #[simd_test(enable = "avx512f")]
49083    unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49084        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49085        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49086            0, a,
49087        );
49088        assert_eq_m512(r, _mm512_setzero_ps());
49089        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49090            0b00000000_11111111,
49091            a,
49092        );
49093        #[rustfmt::skip]
49094        let e = _mm512_setr_ps(
49095            0., 4294967300., 2., 4294967300.,
49096            4., 4294967300., 6., 4294967300.,
49097            0., 0., 0., 0.,
49098            0., 0., 0., 0.,
49099        );
49100        assert_eq_m512(r, e);
49101    }
49102
49103    #[simd_test(enable = "avx512f")]
49104    unsafe fn test_mm512_cvt_roundps_ph() {
49105        let a = _mm512_set1_ps(1.);
49106        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49107        let e = _mm256_setr_epi64x(
49108            4323521613979991040,
49109            4323521613979991040,
49110            4323521613979991040,
49111            4323521613979991040,
49112        );
49113        assert_eq_m256i(r, e);
49114    }
49115
49116    #[simd_test(enable = "avx512f")]
49117    unsafe fn test_mm512_mask_cvt_roundps_ph() {
49118        let a = _mm512_set1_ps(1.);
49119        let src = _mm256_set1_epi16(0);
49120        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49121        assert_eq_m256i(r, src);
49122        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49123        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49124        assert_eq_m256i(r, e);
49125    }
49126
49127    #[simd_test(enable = "avx512f")]
49128    unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49129        let a = _mm512_set1_ps(1.);
49130        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49131        assert_eq_m256i(r, _mm256_setzero_si256());
49132        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49133        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49134        assert_eq_m256i(r, e);
49135    }
49136
49137    #[simd_test(enable = "avx512f,avx512vl")]
49138    unsafe fn test_mm256_mask_cvt_roundps_ph() {
49139        let a = _mm256_set1_ps(1.);
49140        let src = _mm_set1_epi16(0);
49141        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49142        assert_eq_m128i(r, src);
49143        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49144        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49145        assert_eq_m128i(r, e);
49146    }
49147
49148    #[simd_test(enable = "avx512f,avx512vl")]
49149    unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49150        let a = _mm256_set1_ps(1.);
49151        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49152        assert_eq_m128i(r, _mm_setzero_si128());
49153        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49154        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49155        assert_eq_m128i(r, e);
49156    }
49157
49158    #[simd_test(enable = "avx512f,avx512vl")]
49159    unsafe fn test_mm_mask_cvt_roundps_ph() {
49160        let a = _mm_set1_ps(1.);
49161        let src = _mm_set1_epi16(0);
49162        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49163        assert_eq_m128i(r, src);
49164        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49165        let e = _mm_setr_epi64x(4323521613979991040, 0);
49166        assert_eq_m128i(r, e);
49167    }
49168
49169    #[simd_test(enable = "avx512f,avx512vl")]
49170    unsafe fn test_mm_maskz_cvt_roundps_ph() {
49171        let a = _mm_set1_ps(1.);
49172        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49173        assert_eq_m128i(r, _mm_setzero_si128());
49174        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49175        let e = _mm_setr_epi64x(4323521613979991040, 0);
49176        assert_eq_m128i(r, e);
49177    }
49178
49179    #[simd_test(enable = "avx512f")]
49180    unsafe fn test_mm512_cvtps_ph() {
49181        let a = _mm512_set1_ps(1.);
49182        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49183        let e = _mm256_setr_epi64x(
49184            4323521613979991040,
49185            4323521613979991040,
49186            4323521613979991040,
49187            4323521613979991040,
49188        );
49189        assert_eq_m256i(r, e);
49190    }
49191
49192    #[simd_test(enable = "avx512f")]
49193    unsafe fn test_mm512_mask_cvtps_ph() {
49194        let a = _mm512_set1_ps(1.);
49195        let src = _mm256_set1_epi16(0);
49196        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49197        assert_eq_m256i(r, src);
49198        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49199        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49200        assert_eq_m256i(r, e);
49201    }
49202
49203    #[simd_test(enable = "avx512f")]
49204    unsafe fn test_mm512_maskz_cvtps_ph() {
49205        let a = _mm512_set1_ps(1.);
49206        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49207        assert_eq_m256i(r, _mm256_setzero_si256());
49208        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49209        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49210        assert_eq_m256i(r, e);
49211    }
49212
49213    #[simd_test(enable = "avx512f,avx512vl")]
49214    unsafe fn test_mm256_mask_cvtps_ph() {
49215        let a = _mm256_set1_ps(1.);
49216        let src = _mm_set1_epi16(0);
49217        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49218        assert_eq_m128i(r, src);
49219        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49220        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49221        assert_eq_m128i(r, e);
49222    }
49223
49224    #[simd_test(enable = "avx512f,avx512vl")]
49225    unsafe fn test_mm256_maskz_cvtps_ph() {
49226        let a = _mm256_set1_ps(1.);
49227        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49228        assert_eq_m128i(r, _mm_setzero_si128());
49229        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49230        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49231        assert_eq_m128i(r, e);
49232    }
49233
49234    #[simd_test(enable = "avx512f,avx512vl")]
49235    unsafe fn test_mm_mask_cvtps_ph() {
49236        let a = _mm_set1_ps(1.);
49237        let src = _mm_set1_epi16(0);
49238        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49239        assert_eq_m128i(r, src);
49240        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49241        let e = _mm_setr_epi64x(4323521613979991040, 0);
49242        assert_eq_m128i(r, e);
49243    }
49244
49245    #[simd_test(enable = "avx512f,avx512vl")]
49246    unsafe fn test_mm_maskz_cvtps_ph() {
49247        let a = _mm_set1_ps(1.);
49248        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49249        assert_eq_m128i(r, _mm_setzero_si128());
49250        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49251        let e = _mm_setr_epi64x(4323521613979991040, 0);
49252        assert_eq_m128i(r, e);
49253    }
49254
49255    #[simd_test(enable = "avx512f")]
49256    unsafe fn test_mm512_cvt_roundph_ps() {
49257        let a = _mm256_setr_epi64x(
49258            4323521613979991040,
49259            4323521613979991040,
49260            4323521613979991040,
49261            4323521613979991040,
49262        );
49263        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49264        let e = _mm512_set1_ps(1.);
49265        assert_eq_m512(r, e);
49266    }
49267
49268    #[simd_test(enable = "avx512f")]
49269    unsafe fn test_mm512_mask_cvt_roundph_ps() {
49270        let a = _mm256_setr_epi64x(
49271            4323521613979991040,
49272            4323521613979991040,
49273            4323521613979991040,
49274            4323521613979991040,
49275        );
49276        let src = _mm512_set1_ps(0.);
49277        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
49278        assert_eq_m512(r, src);
49279        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49280        let e = _mm512_setr_ps(
49281            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49282        );
49283        assert_eq_m512(r, e);
49284    }
49285
49286    #[simd_test(enable = "avx512f")]
49287    unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49288        let a = _mm256_setr_epi64x(
49289            4323521613979991040,
49290            4323521613979991040,
49291            4323521613979991040,
49292            4323521613979991040,
49293        );
49294        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
49295        assert_eq_m512(r, _mm512_setzero_ps());
49296        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49297        let e = _mm512_setr_ps(
49298            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49299        );
49300        assert_eq_m512(r, e);
49301    }
49302
49303    #[simd_test(enable = "avx512f")]
49304    unsafe fn test_mm512_cvtph_ps() {
49305        let a = _mm256_setr_epi64x(
49306            4323521613979991040,
49307            4323521613979991040,
49308            4323521613979991040,
49309            4323521613979991040,
49310        );
49311        let r = _mm512_cvtph_ps(a);
49312        let e = _mm512_set1_ps(1.);
49313        assert_eq_m512(r, e);
49314    }
49315
49316    #[simd_test(enable = "avx512f")]
49317    unsafe fn test_mm512_mask_cvtph_ps() {
49318        let a = _mm256_setr_epi64x(
49319            4323521613979991040,
49320            4323521613979991040,
49321            4323521613979991040,
49322            4323521613979991040,
49323        );
49324        let src = _mm512_set1_ps(0.);
49325        let r = _mm512_mask_cvtph_ps(src, 0, a);
49326        assert_eq_m512(r, src);
49327        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
49328        let e = _mm512_setr_ps(
49329            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49330        );
49331        assert_eq_m512(r, e);
49332    }
49333
49334    #[simd_test(enable = "avx512f")]
49335    unsafe fn test_mm512_maskz_cvtph_ps() {
49336        let a = _mm256_setr_epi64x(
49337            4323521613979991040,
49338            4323521613979991040,
49339            4323521613979991040,
49340            4323521613979991040,
49341        );
49342        let r = _mm512_maskz_cvtph_ps(0, a);
49343        assert_eq_m512(r, _mm512_setzero_ps());
49344        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
49345        let e = _mm512_setr_ps(
49346            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49347        );
49348        assert_eq_m512(r, e);
49349    }
49350
49351    #[simd_test(enable = "avx512f,avx512vl")]
49352    unsafe fn test_mm256_mask_cvtph_ps() {
49353        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49354        let src = _mm256_set1_ps(0.);
49355        let r = _mm256_mask_cvtph_ps(src, 0, a);
49356        assert_eq_m256(r, src);
49357        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
49358        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49359        assert_eq_m256(r, e);
49360    }
49361
49362    #[simd_test(enable = "avx512f,avx512vl")]
49363    unsafe fn test_mm256_maskz_cvtph_ps() {
49364        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49365        let r = _mm256_maskz_cvtph_ps(0, a);
49366        assert_eq_m256(r, _mm256_setzero_ps());
49367        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
49368        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49369        assert_eq_m256(r, e);
49370    }
49371
49372    #[simd_test(enable = "avx512f,avx512vl")]
49373    unsafe fn test_mm_mask_cvtph_ps() {
49374        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49375        let src = _mm_set1_ps(0.);
49376        let r = _mm_mask_cvtph_ps(src, 0, a);
49377        assert_eq_m128(r, src);
49378        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
49379        let e = _mm_setr_ps(1., 1., 1., 1.);
49380        assert_eq_m128(r, e);
49381    }
49382
49383    #[simd_test(enable = "avx512f,avx512vl")]
49384    unsafe fn test_mm_maskz_cvtph_ps() {
49385        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49386        let r = _mm_maskz_cvtph_ps(0, a);
49387        assert_eq_m128(r, _mm_setzero_ps());
49388        let r = _mm_maskz_cvtph_ps(0b00001111, a);
49389        let e = _mm_setr_ps(1., 1., 1., 1.);
49390        assert_eq_m128(r, e);
49391    }
49392
49393    #[simd_test(enable = "avx512f")]
49394    unsafe fn test_mm512_cvtt_roundps_epi32() {
49395        let a = _mm512_setr_ps(
49396            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49397        );
49398        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49399        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49400        assert_eq_m512i(r, e);
49401    }
49402
49403    #[simd_test(enable = "avx512f")]
49404    unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49405        let a = _mm512_setr_ps(
49406            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49407        );
49408        let src = _mm512_set1_epi32(0);
49409        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
49410        assert_eq_m512i(r, src);
49411        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49412        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49413        assert_eq_m512i(r, e);
49414    }
49415
49416    #[simd_test(enable = "avx512f")]
49417    unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49418        let a = _mm512_setr_ps(
49419            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49420        );
49421        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
49422        assert_eq_m512i(r, _mm512_setzero_si512());
49423        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49424        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49425        assert_eq_m512i(r, e);
49426    }
49427
49428    #[simd_test(enable = "avx512f")]
49429    unsafe fn test_mm512_cvtt_roundps_epu32() {
49430        let a = _mm512_setr_ps(
49431            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49432        );
49433        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49434        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49435        assert_eq_m512i(r, e);
49436    }
49437
49438    #[simd_test(enable = "avx512f")]
49439    unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49440        let a = _mm512_setr_ps(
49441            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49442        );
49443        let src = _mm512_set1_epi32(0);
49444        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
49445        assert_eq_m512i(r, src);
49446        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49447        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49448        assert_eq_m512i(r, e);
49449    }
49450
49451    #[simd_test(enable = "avx512f")]
49452    unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49453        let a = _mm512_setr_ps(
49454            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49455        );
49456        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
49457        assert_eq_m512i(r, _mm512_setzero_si512());
49458        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49459        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49460        assert_eq_m512i(r, e);
49461    }
49462
49463    #[simd_test(enable = "avx512f")]
49464    unsafe fn test_mm512_cvttps_epi32() {
49465        let a = _mm512_setr_ps(
49466            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49467        );
49468        let r = _mm512_cvttps_epi32(a);
49469        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49470        assert_eq_m512i(r, e);
49471    }
49472
49473    #[simd_test(enable = "avx512f")]
49474    unsafe fn test_mm512_mask_cvttps_epi32() {
49475        let a = _mm512_setr_ps(
49476            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49477        );
49478        let src = _mm512_set1_epi32(0);
49479        let r = _mm512_mask_cvttps_epi32(src, 0, a);
49480        assert_eq_m512i(r, src);
49481        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
49482        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49483        assert_eq_m512i(r, e);
49484    }
49485
49486    #[simd_test(enable = "avx512f")]
49487    unsafe fn test_mm512_maskz_cvttps_epi32() {
49488        let a = _mm512_setr_ps(
49489            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49490        );
49491        let r = _mm512_maskz_cvttps_epi32(0, a);
49492        assert_eq_m512i(r, _mm512_setzero_si512());
49493        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
49494        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49495        assert_eq_m512i(r, e);
49496    }
49497
49498    #[simd_test(enable = "avx512f,avx512vl")]
49499    unsafe fn test_mm256_mask_cvttps_epi32() {
49500        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49501        let src = _mm256_set1_epi32(0);
49502        let r = _mm256_mask_cvttps_epi32(src, 0, a);
49503        assert_eq_m256i(r, src);
49504        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
49505        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49506        assert_eq_m256i(r, e);
49507    }
49508
49509    #[simd_test(enable = "avx512f,avx512vl")]
49510    unsafe fn test_mm256_maskz_cvttps_epi32() {
49511        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49512        let r = _mm256_maskz_cvttps_epi32(0, a);
49513        assert_eq_m256i(r, _mm256_setzero_si256());
49514        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
49515        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49516        assert_eq_m256i(r, e);
49517    }
49518
49519    #[simd_test(enable = "avx512f,avx512vl")]
49520    unsafe fn test_mm_mask_cvttps_epi32() {
49521        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49522        let src = _mm_set1_epi32(0);
49523        let r = _mm_mask_cvttps_epi32(src, 0, a);
49524        assert_eq_m128i(r, src);
49525        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
49526        let e = _mm_set_epi32(12, 13, 14, 15);
49527        assert_eq_m128i(r, e);
49528    }
49529
49530    #[simd_test(enable = "avx512f,avx512vl")]
49531    unsafe fn test_mm_maskz_cvttps_epi32() {
49532        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49533        let r = _mm_maskz_cvttps_epi32(0, a);
49534        assert_eq_m128i(r, _mm_setzero_si128());
49535        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
49536        let e = _mm_set_epi32(12, 13, 14, 15);
49537        assert_eq_m128i(r, e);
49538    }
49539
49540    #[simd_test(enable = "avx512f")]
49541    unsafe fn test_mm512_cvttps_epu32() {
49542        let a = _mm512_setr_ps(
49543            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49544        );
49545        let r = _mm512_cvttps_epu32(a);
49546        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49547        assert_eq_m512i(r, e);
49548    }
49549
49550    #[simd_test(enable = "avx512f")]
49551    unsafe fn test_mm512_mask_cvttps_epu32() {
49552        let a = _mm512_setr_ps(
49553            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49554        );
49555        let src = _mm512_set1_epi32(0);
49556        let r = _mm512_mask_cvttps_epu32(src, 0, a);
49557        assert_eq_m512i(r, src);
49558        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
49559        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49560        assert_eq_m512i(r, e);
49561    }
49562
49563    #[simd_test(enable = "avx512f")]
49564    unsafe fn test_mm512_maskz_cvttps_epu32() {
49565        let a = _mm512_setr_ps(
49566            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49567        );
49568        let r = _mm512_maskz_cvttps_epu32(0, a);
49569        assert_eq_m512i(r, _mm512_setzero_si512());
49570        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
49571        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49572        assert_eq_m512i(r, e);
49573    }
49574
49575    #[simd_test(enable = "avx512f,avx512vl")]
49576    unsafe fn test_mm256_cvttps_epu32() {
49577        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49578        let r = _mm256_cvttps_epu32(a);
49579        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49580        assert_eq_m256i(r, e);
49581    }
49582
49583    #[simd_test(enable = "avx512f,avx512vl")]
49584    unsafe fn test_mm256_mask_cvttps_epu32() {
49585        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49586        let src = _mm256_set1_epi32(0);
49587        let r = _mm256_mask_cvttps_epu32(src, 0, a);
49588        assert_eq_m256i(r, src);
49589        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
49590        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49591        assert_eq_m256i(r, e);
49592    }
49593
49594    #[simd_test(enable = "avx512f,avx512vl")]
49595    unsafe fn test_mm256_maskz_cvttps_epu32() {
49596        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49597        let r = _mm256_maskz_cvttps_epu32(0, a);
49598        assert_eq_m256i(r, _mm256_setzero_si256());
49599        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
49600        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49601        assert_eq_m256i(r, e);
49602    }
49603
49604    #[simd_test(enable = "avx512f,avx512vl")]
49605    unsafe fn test_mm_cvttps_epu32() {
49606        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49607        let r = _mm_cvttps_epu32(a);
49608        let e = _mm_set_epi32(12, 13, 14, 15);
49609        assert_eq_m128i(r, e);
49610    }
49611
49612    #[simd_test(enable = "avx512f,avx512vl")]
49613    unsafe fn test_mm_mask_cvttps_epu32() {
49614        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49615        let src = _mm_set1_epi32(0);
49616        let r = _mm_mask_cvttps_epu32(src, 0, a);
49617        assert_eq_m128i(r, src);
49618        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
49619        let e = _mm_set_epi32(12, 13, 14, 15);
49620        assert_eq_m128i(r, e);
49621    }
49622
49623    #[simd_test(enable = "avx512f,avx512vl")]
49624    unsafe fn test_mm_maskz_cvttps_epu32() {
49625        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49626        let r = _mm_maskz_cvttps_epu32(0, a);
49627        assert_eq_m128i(r, _mm_setzero_si128());
49628        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
49629        let e = _mm_set_epi32(12, 13, 14, 15);
49630        assert_eq_m128i(r, e);
49631    }
49632
49633    #[simd_test(enable = "avx512f")]
49634    unsafe fn test_mm512_i32gather_ps() {
49635        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49636        // A multiplier of 4 is word-addressing
49637        #[rustfmt::skip]
49638        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49639                                      120, 128, 136, 144, 152, 160, 168, 176);
49640        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr() as *const u8);
49641        #[rustfmt::skip]
49642        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
49643                                         120., 128., 136., 144., 152., 160., 168., 176.));
49644    }
49645
49646    #[simd_test(enable = "avx512f")]
49647    unsafe fn test_mm512_mask_i32gather_ps() {
49648        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49649        let src = _mm512_set1_ps(2.);
49650        let mask = 0b10101010_10101010;
49651        #[rustfmt::skip]
49652        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49653                                      120, 128, 136, 144, 152, 160, 168, 176);
49654        // A multiplier of 4 is word-addressing
49655        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8);
49656        #[rustfmt::skip]
49657        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
49658                                         2., 128., 2., 144., 2., 160., 2., 176.));
49659    }
49660
49661    #[simd_test(enable = "avx512f")]
49662    unsafe fn test_mm512_i32gather_epi32() {
49663        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49664        // A multiplier of 4 is word-addressing
49665        #[rustfmt::skip]
49666        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49667                                      120, 128, 136, 144, 152, 160, 168, 176);
49668        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr() as *const u8);
49669        #[rustfmt::skip]
49670        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49671                                             120, 128, 136, 144, 152, 160, 168, 176));
49672    }
49673
49674    #[simd_test(enable = "avx512f")]
49675    unsafe fn test_mm512_mask_i32gather_epi32() {
49676        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49677        let src = _mm512_set1_epi32(2);
49678        let mask = 0b10101010_10101010;
49679        let index = _mm512_setr_epi32(
49680            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
49681        );
49682        // A multiplier of 4 is word-addressing
49683        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr() as *const u8);
49684        assert_eq_m512i(
49685            r,
49686            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
49687        );
49688    }
49689
49690    #[simd_test(enable = "avx512f")]
49691    unsafe fn test_mm512_i32scatter_ps() {
49692        let mut arr = [0f32; 256];
49693        #[rustfmt::skip]
49694        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49695                                      128, 144, 160, 176, 192, 208, 224, 240);
49696        let src = _mm512_setr_ps(
49697            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49698        );
49699        // A multiplier of 4 is word-addressing
49700        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src);
49701        let mut expected = [0f32; 256];
49702        for i in 0..16 {
49703            expected[i * 16] = (i + 1) as f32;
49704        }
49705        assert_eq!(&arr[..], &expected[..],);
49706    }
49707
49708    #[simd_test(enable = "avx512f")]
49709    unsafe fn test_mm512_mask_i32scatter_ps() {
49710        let mut arr = [0f32; 256];
49711        let mask = 0b10101010_10101010;
49712        #[rustfmt::skip]
49713        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49714                                      128, 144, 160, 176, 192, 208, 224, 240);
49715        let src = _mm512_setr_ps(
49716            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49717        );
49718        // A multiplier of 4 is word-addressing
49719        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49720        let mut expected = [0f32; 256];
49721        for i in 0..8 {
49722            expected[i * 32 + 16] = 2. * (i + 1) as f32;
49723        }
49724        assert_eq!(&arr[..], &expected[..],);
49725    }
49726
49727    #[simd_test(enable = "avx512f")]
49728    unsafe fn test_mm512_i32scatter_epi32() {
49729        let mut arr = [0i32; 256];
49730        #[rustfmt::skip]
49731
49732        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49733                                      128, 144, 160, 176, 192, 208, 224, 240);
49734        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49735        // A multiplier of 4 is word-addressing
49736        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src);
49737        let mut expected = [0i32; 256];
49738        for i in 0..16 {
49739            expected[i * 16] = (i + 1) as i32;
49740        }
49741        assert_eq!(&arr[..], &expected[..],);
49742    }
49743
49744    #[simd_test(enable = "avx512f")]
49745    unsafe fn test_mm512_mask_i32scatter_epi32() {
49746        let mut arr = [0i32; 256];
49747        let mask = 0b10101010_10101010;
49748        #[rustfmt::skip]
49749        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49750                                      128, 144, 160, 176, 192, 208, 224, 240);
49751        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49752        // A multiplier of 4 is word-addressing
49753        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49754        let mut expected = [0i32; 256];
49755        for i in 0..8 {
49756            expected[i * 32 + 16] = 2 * (i + 1) as i32;
49757        }
49758        assert_eq!(&arr[..], &expected[..],);
49759    }
49760
49761    #[simd_test(enable = "avx512f")]
49762    unsafe fn test_mm512_cmplt_ps_mask() {
49763        #[rustfmt::skip]
49764        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49765                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49766        let b = _mm512_set1_ps(-1.);
49767        let m = _mm512_cmplt_ps_mask(a, b);
49768        assert_eq!(m, 0b00000101_00000101);
49769    }
49770
49771    #[simd_test(enable = "avx512f")]
49772    unsafe fn test_mm512_mask_cmplt_ps_mask() {
49773        #[rustfmt::skip]
49774        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49775                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49776        let b = _mm512_set1_ps(-1.);
49777        let mask = 0b01100110_01100110;
49778        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49779        assert_eq!(r, 0b00000100_00000100);
49780    }
49781
49782    #[simd_test(enable = "avx512f")]
49783    unsafe fn test_mm512_cmpnlt_ps_mask() {
49784        #[rustfmt::skip]
49785        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49786                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49787        let b = _mm512_set1_ps(-1.);
49788        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49789    }
49790
49791    #[simd_test(enable = "avx512f")]
49792    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49793        #[rustfmt::skip]
49794        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49795                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49796        let b = _mm512_set1_ps(-1.);
49797        let mask = 0b01111010_01111010;
49798        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
49799    }
49800
49801    #[simd_test(enable = "avx512f")]
49802    unsafe fn test_mm512_cmpnle_ps_mask() {
49803        #[rustfmt::skip]
49804        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49805                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49806        let b = _mm512_set1_ps(-1.);
49807        let m = _mm512_cmpnle_ps_mask(b, a);
49808        assert_eq!(m, 0b00001101_00001101);
49809    }
49810
49811    #[simd_test(enable = "avx512f")]
49812    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49813        #[rustfmt::skip]
49814        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49815                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49816        let b = _mm512_set1_ps(-1.);
49817        let mask = 0b01100110_01100110;
49818        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49819        assert_eq!(r, 0b00000100_00000100);
49820    }
49821
49822    #[simd_test(enable = "avx512f")]
49823    unsafe fn test_mm512_cmple_ps_mask() {
49824        #[rustfmt::skip]
49825        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49826                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49827        let b = _mm512_set1_ps(-1.);
49828        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
49829    }
49830
49831    #[simd_test(enable = "avx512f")]
49832    unsafe fn test_mm512_mask_cmple_ps_mask() {
49833        #[rustfmt::skip]
49834        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49835                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49836        let b = _mm512_set1_ps(-1.);
49837        let mask = 0b01111010_01111010;
49838        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
49839    }
49840
49841    #[simd_test(enable = "avx512f")]
49842    unsafe fn test_mm512_cmpeq_ps_mask() {
49843        #[rustfmt::skip]
49844        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49845                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49846        #[rustfmt::skip]
49847        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49848                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49849        let m = _mm512_cmpeq_ps_mask(b, a);
49850        assert_eq!(m, 0b11001101_11001101);
49851    }
49852
49853    #[simd_test(enable = "avx512f")]
49854    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49855        #[rustfmt::skip]
49856        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49857                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49858        #[rustfmt::skip]
49859        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49860                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49861        let mask = 0b01111010_01111010;
49862        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49863        assert_eq!(r, 0b01001000_01001000);
49864    }
49865
49866    #[simd_test(enable = "avx512f")]
49867    unsafe fn test_mm512_cmpneq_ps_mask() {
49868        #[rustfmt::skip]
49869        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49870                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49871        #[rustfmt::skip]
49872        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49873                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49874        let m = _mm512_cmpneq_ps_mask(b, a);
49875        assert_eq!(m, 0b00110010_00110010);
49876    }
49877
49878    #[simd_test(enable = "avx512f")]
49879    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49880        #[rustfmt::skip]
49881        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49882                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49883        #[rustfmt::skip]
49884        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49885                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49886        let mask = 0b01111010_01111010;
49887        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49888        assert_eq!(r, 0b00110010_00110010)
49889    }
49890
49891    #[simd_test(enable = "avx512f")]
49892    unsafe fn test_mm512_cmp_ps_mask() {
49893        #[rustfmt::skip]
49894        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49895                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49896        let b = _mm512_set1_ps(-1.);
49897        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49898        assert_eq!(m, 0b00000101_00000101);
49899    }
49900
49901    #[simd_test(enable = "avx512f")]
49902    unsafe fn test_mm512_mask_cmp_ps_mask() {
49903        #[rustfmt::skip]
49904        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49905                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49906        let b = _mm512_set1_ps(-1.);
49907        let mask = 0b01100110_01100110;
49908        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49909        assert_eq!(r, 0b00000100_00000100);
49910    }
49911
49912    #[simd_test(enable = "avx512f,avx512vl")]
49913    unsafe fn test_mm256_cmp_ps_mask() {
49914        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49915        let b = _mm256_set1_ps(-1.);
49916        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49917        assert_eq!(m, 0b00000101);
49918    }
49919
49920    #[simd_test(enable = "avx512f,avx512vl")]
49921    unsafe fn test_mm256_mask_cmp_ps_mask() {
49922        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49923        let b = _mm256_set1_ps(-1.);
49924        let mask = 0b01100110;
49925        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49926        assert_eq!(r, 0b00000100);
49927    }
49928
49929    #[simd_test(enable = "avx512f,avx512vl")]
49930    unsafe fn test_mm_cmp_ps_mask() {
49931        let a = _mm_set_ps(0., 1., -1., 13.);
49932        let b = _mm_set1_ps(1.);
49933        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49934        assert_eq!(m, 0b00001010);
49935    }
49936
49937    #[simd_test(enable = "avx512f,avx512vl")]
49938    unsafe fn test_mm_mask_cmp_ps_mask() {
49939        let a = _mm_set_ps(0., 1., -1., 13.);
49940        let b = _mm_set1_ps(1.);
49941        let mask = 0b11111111;
49942        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49943        assert_eq!(r, 0b00001010);
49944    }
49945
49946    #[simd_test(enable = "avx512f")]
49947    unsafe fn test_mm512_cmp_round_ps_mask() {
49948        #[rustfmt::skip]
49949        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49950                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49951        let b = _mm512_set1_ps(-1.);
49952        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
49953        assert_eq!(m, 0b00000101_00000101);
49954    }
49955
49956    #[simd_test(enable = "avx512f")]
49957    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
49958        #[rustfmt::skip]
49959        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49960                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49961        let b = _mm512_set1_ps(-1.);
49962        let mask = 0b01100110_01100110;
49963        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
49964        assert_eq!(r, 0b00000100_00000100);
49965    }
49966
49967    #[simd_test(enable = "avx512f")]
49968    unsafe fn test_mm512_cmpord_ps_mask() {
49969        #[rustfmt::skip]
49970        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49971                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49972        #[rustfmt::skip]
49973        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49974                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49975        let m = _mm512_cmpord_ps_mask(a, b);
49976        assert_eq!(m, 0b00000101_00000101);
49977    }
49978
49979    #[simd_test(enable = "avx512f")]
49980    unsafe fn test_mm512_mask_cmpord_ps_mask() {
49981        #[rustfmt::skip]
49982        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49983                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49984        #[rustfmt::skip]
49985        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49986                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49987        let mask = 0b11000011_11000011;
49988        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
49989        assert_eq!(m, 0b00000001_00000001);
49990    }
49991
49992    #[simd_test(enable = "avx512f")]
49993    unsafe fn test_mm512_cmpunord_ps_mask() {
49994        #[rustfmt::skip]
49995        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49996                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49997        #[rustfmt::skip]
49998        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49999                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50000        let m = _mm512_cmpunord_ps_mask(a, b);
50001
50002        assert_eq!(m, 0b11111010_11111010);
50003    }
50004
50005    #[simd_test(enable = "avx512f")]
50006    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
50007        #[rustfmt::skip]
50008        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50009                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50010        #[rustfmt::skip]
50011        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50012                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50013        let mask = 0b00001111_00001111;
50014        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
50015        assert_eq!(m, 0b000001010_00001010);
50016    }
50017
50018    #[simd_test(enable = "avx512f")]
50019    unsafe fn test_mm_cmp_ss_mask() {
50020        let a = _mm_setr_ps(2., 1., 1., 1.);
50021        let b = _mm_setr_ps(1., 2., 2., 2.);
50022        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
50023        assert_eq!(m, 1);
50024    }
50025
50026    #[simd_test(enable = "avx512f")]
50027    unsafe fn test_mm_mask_cmp_ss_mask() {
50028        let a = _mm_setr_ps(2., 1., 1., 1.);
50029        let b = _mm_setr_ps(1., 2., 2., 2.);
50030        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
50031        assert_eq!(m, 0);
50032        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
50033        assert_eq!(m, 1);
50034    }
50035
50036    #[simd_test(enable = "avx512f")]
50037    unsafe fn test_mm_cmp_round_ss_mask() {
50038        let a = _mm_setr_ps(2., 1., 1., 1.);
50039        let b = _mm_setr_ps(1., 2., 2., 2.);
50040        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50041        assert_eq!(m, 1);
50042    }
50043
50044    #[simd_test(enable = "avx512f")]
50045    unsafe fn test_mm_mask_cmp_round_ss_mask() {
50046        let a = _mm_setr_ps(2., 1., 1., 1.);
50047        let b = _mm_setr_ps(1., 2., 2., 2.);
50048        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50049        assert_eq!(m, 0);
50050        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50051        assert_eq!(m, 1);
50052    }
50053
50054    #[simd_test(enable = "avx512f")]
50055    unsafe fn test_mm_cmp_sd_mask() {
50056        let a = _mm_setr_pd(2., 1.);
50057        let b = _mm_setr_pd(1., 2.);
50058        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50059        assert_eq!(m, 1);
50060    }
50061
50062    #[simd_test(enable = "avx512f")]
50063    unsafe fn test_mm_mask_cmp_sd_mask() {
50064        let a = _mm_setr_pd(2., 1.);
50065        let b = _mm_setr_pd(1., 2.);
50066        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
50067        assert_eq!(m, 0);
50068        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
50069        assert_eq!(m, 1);
50070    }
50071
50072    #[simd_test(enable = "avx512f")]
50073    unsafe fn test_mm_cmp_round_sd_mask() {
50074        let a = _mm_setr_pd(2., 1.);
50075        let b = _mm_setr_pd(1., 2.);
50076        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50077        assert_eq!(m, 1);
50078    }
50079
50080    #[simd_test(enable = "avx512f")]
50081    unsafe fn test_mm_mask_cmp_round_sd_mask() {
50082        let a = _mm_setr_pd(2., 1.);
50083        let b = _mm_setr_pd(1., 2.);
50084        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50085        assert_eq!(m, 0);
50086        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50087        assert_eq!(m, 1);
50088    }
50089
50090    #[simd_test(enable = "avx512f")]
50091    unsafe fn test_mm512_cmplt_epu32_mask() {
50092        #[rustfmt::skip]
50093        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50094                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50095        let b = _mm512_set1_epi32(-1);
50096        let m = _mm512_cmplt_epu32_mask(a, b);
50097        assert_eq!(m, 0b11001111_11001111);
50098    }
50099
50100    #[simd_test(enable = "avx512f")]
50101    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50102        #[rustfmt::skip]
50103        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50104                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50105        let b = _mm512_set1_epi32(-1);
50106        let mask = 0b01111010_01111010;
50107        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50108        assert_eq!(r, 0b01001010_01001010);
50109    }
50110
50111    #[simd_test(enable = "avx512f,avx512vl")]
50112    unsafe fn test_mm256_cmplt_epu32_mask() {
50113        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50114        let b = _mm256_set1_epi32(1);
50115        let r = _mm256_cmplt_epu32_mask(a, b);
50116        assert_eq!(r, 0b10000000);
50117    }
50118
50119    #[simd_test(enable = "avx512f,avx512vl")]
50120    unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50121        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50122        let b = _mm256_set1_epi32(1);
50123        let mask = 0b11111111;
50124        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50125        assert_eq!(r, 0b10000000);
50126    }
50127
50128    #[simd_test(enable = "avx512f,avx512vl")]
50129    unsafe fn test_mm_cmplt_epu32_mask() {
50130        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50131        let b = _mm_set1_epi32(1);
50132        let r = _mm_cmplt_epu32_mask(a, b);
50133        assert_eq!(r, 0b00001000);
50134    }
50135
50136    #[simd_test(enable = "avx512f,avx512vl")]
50137    unsafe fn test_mm_mask_cmplt_epu32_mask() {
50138        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50139        let b = _mm_set1_epi32(1);
50140        let mask = 0b11111111;
50141        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50142        assert_eq!(r, 0b00001000);
50143    }
50144
50145    #[simd_test(enable = "avx512f")]
50146    unsafe fn test_mm512_cmpgt_epu32_mask() {
50147        #[rustfmt::skip]
50148        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50149                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50150        let b = _mm512_set1_epi32(-1);
50151        let m = _mm512_cmpgt_epu32_mask(b, a);
50152        assert_eq!(m, 0b11001111_11001111);
50153    }
50154
50155    #[simd_test(enable = "avx512f")]
50156    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50157        #[rustfmt::skip]
50158        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50159                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50160        let b = _mm512_set1_epi32(-1);
50161        let mask = 0b01111010_01111010;
50162        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50163        assert_eq!(r, 0b01001010_01001010);
50164    }
50165
50166    #[simd_test(enable = "avx512f,avx512vl")]
50167    unsafe fn test_mm256_cmpgt_epu32_mask() {
50168        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50169        let b = _mm256_set1_epi32(1);
50170        let r = _mm256_cmpgt_epu32_mask(a, b);
50171        assert_eq!(r, 0b00111111);
50172    }
50173
50174    #[simd_test(enable = "avx512f,avx512vl")]
50175    unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50176        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50177        let b = _mm256_set1_epi32(1);
50178        let mask = 0b11111111;
50179        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50180        assert_eq!(r, 0b00111111);
50181    }
50182
50183    #[simd_test(enable = "avx512f,avx512vl")]
50184    unsafe fn test_mm_cmpgt_epu32_mask() {
50185        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50186        let b = _mm_set1_epi32(1);
50187        let r = _mm_cmpgt_epu32_mask(a, b);
50188        assert_eq!(r, 0b00000011);
50189    }
50190
50191    #[simd_test(enable = "avx512f,avx512vl")]
50192    unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50193        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50194        let b = _mm_set1_epi32(1);
50195        let mask = 0b11111111;
50196        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50197        assert_eq!(r, 0b00000011);
50198    }
50199
50200    #[simd_test(enable = "avx512f")]
50201    unsafe fn test_mm512_cmple_epu32_mask() {
50202        #[rustfmt::skip]
50203        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50204                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50205        let b = _mm512_set1_epi32(-1);
50206        assert_eq!(
50207            _mm512_cmple_epu32_mask(a, b),
50208            !_mm512_cmpgt_epu32_mask(a, b)
50209        )
50210    }
50211
50212    #[simd_test(enable = "avx512f")]
50213    unsafe fn test_mm512_mask_cmple_epu32_mask() {
50214        #[rustfmt::skip]
50215        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50216                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50217        let b = _mm512_set1_epi32(-1);
50218        let mask = 0b01111010_01111010;
50219        assert_eq!(
50220            _mm512_mask_cmple_epu32_mask(mask, a, b),
50221            0b01111010_01111010
50222        );
50223    }
50224
50225    #[simd_test(enable = "avx512f,avx512vl")]
50226    unsafe fn test_mm256_cmple_epu32_mask() {
50227        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50228        let b = _mm256_set1_epi32(1);
50229        let r = _mm256_cmple_epu32_mask(a, b);
50230        assert_eq!(r, 0b11000000)
50231    }
50232
50233    #[simd_test(enable = "avx512f,avx512vl")]
50234    unsafe fn test_mm256_mask_cmple_epu32_mask() {
50235        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50236        let b = _mm256_set1_epi32(1);
50237        let mask = 0b11111111;
50238        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50239        assert_eq!(r, 0b11000000)
50240    }
50241
50242    #[simd_test(enable = "avx512f,avx512vl")]
50243    unsafe fn test_mm_cmple_epu32_mask() {
50244        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50245        let b = _mm_set1_epi32(1);
50246        let r = _mm_cmple_epu32_mask(a, b);
50247        assert_eq!(r, 0b00001100)
50248    }
50249
50250    #[simd_test(enable = "avx512f,avx512vl")]
50251    unsafe fn test_mm_mask_cmple_epu32_mask() {
50252        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50253        let b = _mm_set1_epi32(1);
50254        let mask = 0b11111111;
50255        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50256        assert_eq!(r, 0b00001100)
50257    }
50258
50259    #[simd_test(enable = "avx512f")]
50260    unsafe fn test_mm512_cmpge_epu32_mask() {
50261        #[rustfmt::skip]
50262        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50263                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50264        let b = _mm512_set1_epi32(-1);
50265        assert_eq!(
50266            _mm512_cmpge_epu32_mask(a, b),
50267            !_mm512_cmplt_epu32_mask(a, b)
50268        )
50269    }
50270
50271    #[simd_test(enable = "avx512f")]
50272    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50273        #[rustfmt::skip]
50274        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50275                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50276        let b = _mm512_set1_epi32(-1);
50277        let mask = 0b01111010_01111010;
50278        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
50279    }
50280
50281    #[simd_test(enable = "avx512f,avx512vl")]
50282    unsafe fn test_mm256_cmpge_epu32_mask() {
50283        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50284        let b = _mm256_set1_epi32(1);
50285        let r = _mm256_cmpge_epu32_mask(a, b);
50286        assert_eq!(r, 0b01111111)
50287    }
50288
50289    #[simd_test(enable = "avx512f,avx512vl")]
50290    unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50291        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50292        let b = _mm256_set1_epi32(1);
50293        let mask = 0b11111111;
50294        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50295        assert_eq!(r, 0b01111111)
50296    }
50297
50298    #[simd_test(enable = "avx512f,avx512vl")]
50299    unsafe fn test_mm_cmpge_epu32_mask() {
50300        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50301        let b = _mm_set1_epi32(1);
50302        let r = _mm_cmpge_epu32_mask(a, b);
50303        assert_eq!(r, 0b00000111)
50304    }
50305
50306    #[simd_test(enable = "avx512f,avx512vl")]
50307    unsafe fn test_mm_mask_cmpge_epu32_mask() {
50308        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50309        let b = _mm_set1_epi32(1);
50310        let mask = 0b11111111;
50311        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50312        assert_eq!(r, 0b00000111)
50313    }
50314
50315    #[simd_test(enable = "avx512f")]
50316    unsafe fn test_mm512_cmpeq_epu32_mask() {
50317        #[rustfmt::skip]
50318        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50319                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50320        #[rustfmt::skip]
50321        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50322                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50323        let m = _mm512_cmpeq_epu32_mask(b, a);
50324        assert_eq!(m, 0b11001111_11001111);
50325    }
50326
50327    #[simd_test(enable = "avx512f")]
50328    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50329        #[rustfmt::skip]
50330        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50331                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50332        #[rustfmt::skip]
50333        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50334                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50335        let mask = 0b01111010_01111010;
50336        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50337        assert_eq!(r, 0b01001010_01001010);
50338    }
50339
50340    #[simd_test(enable = "avx512f,avx512vl")]
50341    unsafe fn test_mm256_cmpeq_epu32_mask() {
50342        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50343        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50344        let m = _mm256_cmpeq_epu32_mask(b, a);
50345        assert_eq!(m, 0b11001111);
50346    }
50347
50348    #[simd_test(enable = "avx512f,avx512vl")]
50349    unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50350        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50351        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50352        let mask = 0b01111010;
50353        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50354        assert_eq!(r, 0b01001010);
50355    }
50356
50357    #[simd_test(enable = "avx512f,avx512vl")]
50358    unsafe fn test_mm_cmpeq_epu32_mask() {
50359        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50360        let b = _mm_set_epi32(0, 1, 13, 42);
50361        let m = _mm_cmpeq_epu32_mask(b, a);
50362        assert_eq!(m, 0b00001100);
50363    }
50364
50365    #[simd_test(enable = "avx512f,avx512vl")]
50366    unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50367        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50368        let b = _mm_set_epi32(0, 1, 13, 42);
50369        let mask = 0b11111111;
50370        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50371        assert_eq!(r, 0b00001100);
50372    }
50373
50374    #[simd_test(enable = "avx512f")]
50375    unsafe fn test_mm512_cmpneq_epu32_mask() {
50376        #[rustfmt::skip]
50377        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50378                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50379        #[rustfmt::skip]
50380        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50381                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50382        let m = _mm512_cmpneq_epu32_mask(b, a);
50383        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50384    }
50385
50386    #[simd_test(enable = "avx512f")]
50387    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50388        #[rustfmt::skip]
50389        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
50390                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50391        #[rustfmt::skip]
50392        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50393                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50394        let mask = 0b01111010_01111010;
50395        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50396        assert_eq!(r, 0b00110010_00110010);
50397    }
50398
50399    #[simd_test(enable = "avx512f,avx512vl")]
50400    unsafe fn test_mm256_cmpneq_epu32_mask() {
50401        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50402        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50403        let r = _mm256_cmpneq_epu32_mask(b, a);
50404        assert_eq!(r, 0b00110000);
50405    }
50406
50407    #[simd_test(enable = "avx512f,avx512vl")]
50408    unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50409        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50410        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50411        let mask = 0b11111111;
50412        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50413        assert_eq!(r, 0b00110000);
50414    }
50415
50416    #[simd_test(enable = "avx512f,avx512vl")]
50417    unsafe fn test_mm_cmpneq_epu32_mask() {
50418        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50419        let b = _mm_set_epi32(0, 1, 13, 42);
50420        let r = _mm_cmpneq_epu32_mask(b, a);
50421        assert_eq!(r, 0b00000011);
50422    }
50423
50424    #[simd_test(enable = "avx512f,avx512vl")]
50425    unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50426        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50427        let b = _mm_set_epi32(0, 1, 13, 42);
50428        let mask = 0b11111111;
50429        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50430        assert_eq!(r, 0b00000011);
50431    }
50432
50433    #[simd_test(enable = "avx512f")]
50434    unsafe fn test_mm512_cmp_epu32_mask() {
50435        #[rustfmt::skip]
50436        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50437                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50438        let b = _mm512_set1_epi32(-1);
50439        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50440        assert_eq!(m, 0b11001111_11001111);
50441    }
50442
50443    #[simd_test(enable = "avx512f")]
50444    unsafe fn test_mm512_mask_cmp_epu32_mask() {
50445        #[rustfmt::skip]
50446        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50447                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50448        let b = _mm512_set1_epi32(-1);
50449        let mask = 0b01111010_01111010;
50450        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50451        assert_eq!(r, 0b01001010_01001010);
50452    }
50453
50454    #[simd_test(enable = "avx512f,avx512vl")]
50455    unsafe fn test_mm256_cmp_epu32_mask() {
50456        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50457        let b = _mm256_set1_epi32(-1);
50458        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50459        assert_eq!(m, 0b11001111);
50460    }
50461
50462    #[simd_test(enable = "avx512f,avx512vl")]
50463    unsafe fn test_mm256_mask_cmp_epu32_mask() {
50464        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50465        let b = _mm256_set1_epi32(-1);
50466        let mask = 0b11111111;
50467        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50468        assert_eq!(r, 0b11001111);
50469    }
50470
50471    #[simd_test(enable = "avx512f,avx512vl")]
50472    unsafe fn test_mm_cmp_epu32_mask() {
50473        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50474        let b = _mm_set1_epi32(1);
50475        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50476        assert_eq!(m, 0b00001000);
50477    }
50478
50479    #[simd_test(enable = "avx512f,avx512vl")]
50480    unsafe fn test_mm_mask_cmp_epu32_mask() {
50481        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50482        let b = _mm_set1_epi32(1);
50483        let mask = 0b11111111;
50484        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50485        assert_eq!(r, 0b00001000);
50486    }
50487
50488    #[simd_test(enable = "avx512f")]
50489    unsafe fn test_mm512_cmplt_epi32_mask() {
50490        #[rustfmt::skip]
50491        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50492                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50493        let b = _mm512_set1_epi32(-1);
50494        let m = _mm512_cmplt_epi32_mask(a, b);
50495        assert_eq!(m, 0b00000101_00000101);
50496    }
50497
50498    #[simd_test(enable = "avx512f")]
50499    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50500        #[rustfmt::skip]
50501        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50502                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50503        let b = _mm512_set1_epi32(-1);
50504        let mask = 0b01100110_01100110;
50505        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50506        assert_eq!(r, 0b00000100_00000100);
50507    }
50508
50509    #[simd_test(enable = "avx512f,avx512vl")]
50510    unsafe fn test_mm256_cmplt_epi32_mask() {
50511        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50512        let b = _mm256_set1_epi32(-1);
50513        let r = _mm256_cmplt_epi32_mask(a, b);
50514        assert_eq!(r, 0b00000101);
50515    }
50516
50517    #[simd_test(enable = "avx512f,avx512vl")]
50518    unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50519        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50520        let b = _mm256_set1_epi32(-1);
50521        let mask = 0b11111111;
50522        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50523        assert_eq!(r, 0b00000101);
50524    }
50525
50526    #[simd_test(enable = "avx512f,avx512vl")]
50527    unsafe fn test_mm_cmplt_epi32_mask() {
50528        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50529        let b = _mm_set1_epi32(-1);
50530        let r = _mm_cmplt_epi32_mask(a, b);
50531        assert_eq!(r, 0b00000101);
50532    }
50533
50534    #[simd_test(enable = "avx512f,avx512vl")]
50535    unsafe fn test_mm_mask_cmplt_epi32_mask() {
50536        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50537        let b = _mm_set1_epi32(-1);
50538        let mask = 0b11111111;
50539        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50540        assert_eq!(r, 0b00000101);
50541    }
50542
50543    #[simd_test(enable = "avx512f")]
50544    unsafe fn test_mm512_cmpgt_epi32_mask() {
50545        #[rustfmt::skip]
50546        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50547                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50548        let b = _mm512_set1_epi32(-1);
50549        let m = _mm512_cmpgt_epi32_mask(b, a);
50550        assert_eq!(m, 0b00000101_00000101);
50551    }
50552
50553    #[simd_test(enable = "avx512f")]
50554    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50555        #[rustfmt::skip]
50556        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50557                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50558        let b = _mm512_set1_epi32(-1);
50559        let mask = 0b01100110_01100110;
50560        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50561        assert_eq!(r, 0b00000100_00000100);
50562    }
50563
50564    #[simd_test(enable = "avx512f,avx512vl")]
50565    unsafe fn test_mm256_cmpgt_epi32_mask() {
50566        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50567        let b = _mm256_set1_epi32(-1);
50568        let r = _mm256_cmpgt_epi32_mask(a, b);
50569        assert_eq!(r, 0b11011010);
50570    }
50571
50572    #[simd_test(enable = "avx512f,avx512vl")]
50573    unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50574        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50575        let b = _mm256_set1_epi32(-1);
50576        let mask = 0b11111111;
50577        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50578        assert_eq!(r, 0b11011010);
50579    }
50580
50581    #[simd_test(enable = "avx512f,avx512vl")]
50582    unsafe fn test_mm_cmpgt_epi32_mask() {
50583        let a = _mm_set_epi32(0, 1, -1, 13);
50584        let b = _mm_set1_epi32(-1);
50585        let r = _mm_cmpgt_epi32_mask(a, b);
50586        assert_eq!(r, 0b00001101);
50587    }
50588
50589    #[simd_test(enable = "avx512f,avx512vl")]
50590    unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50591        let a = _mm_set_epi32(0, 1, -1, 13);
50592        let b = _mm_set1_epi32(-1);
50593        let mask = 0b11111111;
50594        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50595        assert_eq!(r, 0b00001101);
50596    }
50597
50598    #[simd_test(enable = "avx512f")]
50599    unsafe fn test_mm512_cmple_epi32_mask() {
50600        #[rustfmt::skip]
50601        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50602                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50603        let b = _mm512_set1_epi32(-1);
50604        assert_eq!(
50605            _mm512_cmple_epi32_mask(a, b),
50606            !_mm512_cmpgt_epi32_mask(a, b)
50607        )
50608    }
50609
50610    #[simd_test(enable = "avx512f")]
50611    unsafe fn test_mm512_mask_cmple_epi32_mask() {
50612        #[rustfmt::skip]
50613        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50614                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50615        let b = _mm512_set1_epi32(-1);
50616        let mask = 0b01111010_01111010;
50617        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
50618    }
50619
50620    #[simd_test(enable = "avx512f,avx512vl")]
50621    unsafe fn test_mm256_cmple_epi32_mask() {
50622        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50623        let b = _mm256_set1_epi32(-1);
50624        let r = _mm256_cmple_epi32_mask(a, b);
50625        assert_eq!(r, 0b00100101)
50626    }
50627
50628    #[simd_test(enable = "avx512f,avx512vl")]
50629    unsafe fn test_mm256_mask_cmple_epi32_mask() {
50630        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50631        let b = _mm256_set1_epi32(-1);
50632        let mask = 0b11111111;
50633        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50634        assert_eq!(r, 0b00100101)
50635    }
50636
50637    #[simd_test(enable = "avx512f,avx512vl")]
50638    unsafe fn test_mm_cmple_epi32_mask() {
50639        let a = _mm_set_epi32(0, 1, -1, 200);
50640        let b = _mm_set1_epi32(-1);
50641        let r = _mm_cmple_epi32_mask(a, b);
50642        assert_eq!(r, 0b00000010)
50643    }
50644
50645    #[simd_test(enable = "avx512f,avx512vl")]
50646    unsafe fn test_mm_mask_cmple_epi32_mask() {
50647        let a = _mm_set_epi32(0, 1, -1, 200);
50648        let b = _mm_set1_epi32(-1);
50649        let mask = 0b11111111;
50650        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50651        assert_eq!(r, 0b00000010)
50652    }
50653
50654    #[simd_test(enable = "avx512f")]
50655    unsafe fn test_mm512_cmpge_epi32_mask() {
50656        #[rustfmt::skip]
50657        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50658                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50659        let b = _mm512_set1_epi32(-1);
50660        assert_eq!(
50661            _mm512_cmpge_epi32_mask(a, b),
50662            !_mm512_cmplt_epi32_mask(a, b)
50663        )
50664    }
50665
50666    #[simd_test(enable = "avx512f")]
50667    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50668        #[rustfmt::skip]
50669        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50670                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50671        let b = _mm512_set1_epi32(-1);
50672        let mask = 0b01111010_01111010;
50673        assert_eq!(
50674            _mm512_mask_cmpge_epi32_mask(mask, a, b),
50675            0b01111010_01111010
50676        );
50677    }
50678
50679    #[simd_test(enable = "avx512f,avx512vl")]
50680    unsafe fn test_mm256_cmpge_epi32_mask() {
50681        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50682        let b = _mm256_set1_epi32(-1);
50683        let r = _mm256_cmpge_epi32_mask(a, b);
50684        assert_eq!(r, 0b11111010)
50685    }
50686
50687    #[simd_test(enable = "avx512f,avx512vl")]
50688    unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50689        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50690        let b = _mm256_set1_epi32(-1);
50691        let mask = 0b11111111;
50692        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50693        assert_eq!(r, 0b11111010)
50694    }
50695
50696    #[simd_test(enable = "avx512f,avx512vl")]
50697    unsafe fn test_mm_cmpge_epi32_mask() {
50698        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50699        let b = _mm_set1_epi32(-1);
50700        let r = _mm_cmpge_epi32_mask(a, b);
50701        assert_eq!(r, 0b00001111)
50702    }
50703
50704    #[simd_test(enable = "avx512f,avx512vl")]
50705    unsafe fn test_mm_mask_cmpge_epi32_mask() {
50706        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50707        let b = _mm_set1_epi32(-1);
50708        let mask = 0b11111111;
50709        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50710        assert_eq!(r, 0b00001111)
50711    }
50712
50713    #[simd_test(enable = "avx512f")]
50714    unsafe fn test_mm512_cmpeq_epi32_mask() {
50715        #[rustfmt::skip]
50716        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50717                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50718        #[rustfmt::skip]
50719        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50720                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50721        let m = _mm512_cmpeq_epi32_mask(b, a);
50722        assert_eq!(m, 0b11001111_11001111);
50723    }
50724
50725    #[simd_test(enable = "avx512f")]
50726    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50727        #[rustfmt::skip]
50728        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50729                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50730        #[rustfmt::skip]
50731        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50732                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50733        let mask = 0b01111010_01111010;
50734        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50735        assert_eq!(r, 0b01001010_01001010);
50736    }
50737
50738    #[simd_test(enable = "avx512f,avx512vl")]
50739    unsafe fn test_mm256_cmpeq_epi32_mask() {
50740        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50741        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50742        let m = _mm256_cmpeq_epi32_mask(b, a);
50743        assert_eq!(m, 0b11001111);
50744    }
50745
50746    #[simd_test(enable = "avx512f,avx512vl")]
50747    unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50748        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50749        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50750        let mask = 0b01111010;
50751        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50752        assert_eq!(r, 0b01001010);
50753    }
50754
50755    #[simd_test(enable = "avx512f,avx512vl")]
50756    unsafe fn test_mm_cmpeq_epi32_mask() {
50757        let a = _mm_set_epi32(0, 1, -1, 13);
50758        let b = _mm_set_epi32(0, 1, 13, 42);
50759        let m = _mm_cmpeq_epi32_mask(b, a);
50760        assert_eq!(m, 0b00001100);
50761    }
50762
50763    #[simd_test(enable = "avx512f,avx512vl")]
50764    unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50765        let a = _mm_set_epi32(0, 1, -1, 13);
50766        let b = _mm_set_epi32(0, 1, 13, 42);
50767        let mask = 0b11111111;
50768        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50769        assert_eq!(r, 0b00001100);
50770    }
50771
50772    #[simd_test(enable = "avx512f")]
50773    unsafe fn test_mm512_cmpneq_epi32_mask() {
50774        #[rustfmt::skip]
50775        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50776                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50777        #[rustfmt::skip]
50778        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50779                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50780        let m = _mm512_cmpneq_epi32_mask(b, a);
50781        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50782    }
50783
50784    #[simd_test(enable = "avx512f")]
50785    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50786        #[rustfmt::skip]
50787        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
50788                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50789        #[rustfmt::skip]
50790        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50791                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50792        let mask = 0b01111010_01111010;
50793        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50794        assert_eq!(r, 0b00110010_00110010)
50795    }
50796
50797    #[simd_test(enable = "avx512f,avx512vl")]
50798    unsafe fn test_mm256_cmpneq_epi32_mask() {
50799        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50800        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50801        let m = _mm256_cmpneq_epi32_mask(b, a);
50802        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50803    }
50804
50805    #[simd_test(enable = "avx512f,avx512vl")]
50806    unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50807        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50808        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50809        let mask = 0b11111111;
50810        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50811        assert_eq!(r, 0b00110011)
50812    }
50813
50814    #[simd_test(enable = "avx512f,avx512vl")]
50815    unsafe fn test_mm_cmpneq_epi32_mask() {
50816        let a = _mm_set_epi32(0, 1, -1, 13);
50817        let b = _mm_set_epi32(0, 1, 13, 42);
50818        let r = _mm_cmpneq_epi32_mask(b, a);
50819        assert_eq!(r, 0b00000011)
50820    }
50821
50822    #[simd_test(enable = "avx512f,avx512vl")]
50823    unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50824        let a = _mm_set_epi32(0, 1, -1, 13);
50825        let b = _mm_set_epi32(0, 1, 13, 42);
50826        let mask = 0b11111111;
50827        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50828        assert_eq!(r, 0b00000011)
50829    }
50830
50831    #[simd_test(enable = "avx512f")]
50832    unsafe fn test_mm512_cmp_epi32_mask() {
50833        #[rustfmt::skip]
50834        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50835                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50836        let b = _mm512_set1_epi32(-1);
50837        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50838        assert_eq!(m, 0b00000101_00000101);
50839    }
50840
50841    #[simd_test(enable = "avx512f")]
50842    unsafe fn test_mm512_mask_cmp_epi32_mask() {
50843        #[rustfmt::skip]
50844        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50845                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50846        let b = _mm512_set1_epi32(-1);
50847        let mask = 0b01100110_01100110;
50848        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50849        assert_eq!(r, 0b00000100_00000100);
50850    }
50851
50852    #[simd_test(enable = "avx512f,avx512vl")]
50853    unsafe fn test_mm256_cmp_epi32_mask() {
50854        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50855        let b = _mm256_set1_epi32(-1);
50856        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50857        assert_eq!(m, 0b00000101);
50858    }
50859
50860    #[simd_test(enable = "avx512f,avx512vl")]
50861    unsafe fn test_mm256_mask_cmp_epi32_mask() {
50862        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50863        let b = _mm256_set1_epi32(-1);
50864        let mask = 0b01100110;
50865        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50866        assert_eq!(r, 0b00000100);
50867    }
50868
50869    #[simd_test(enable = "avx512f,avx512vl")]
50870    unsafe fn test_mm_cmp_epi32_mask() {
50871        let a = _mm_set_epi32(0, 1, -1, 13);
50872        let b = _mm_set1_epi32(1);
50873        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50874        assert_eq!(m, 0b00001010);
50875    }
50876
50877    #[simd_test(enable = "avx512f,avx512vl")]
50878    unsafe fn test_mm_mask_cmp_epi32_mask() {
50879        let a = _mm_set_epi32(0, 1, -1, 13);
50880        let b = _mm_set1_epi32(1);
50881        let mask = 0b11111111;
50882        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50883        assert_eq!(r, 0b00001010);
50884    }
50885
50886    #[simd_test(enable = "avx512f")]
50887    unsafe fn test_mm512_set_epi8() {
50888        let r = _mm512_set1_epi8(2);
50889        assert_eq_m512i(
50890            r,
50891            _mm512_set_epi8(
50892                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50893                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50894                2, 2, 2, 2, 2, 2, 2, 2,
50895            ),
50896        )
50897    }
50898
50899    #[simd_test(enable = "avx512f")]
50900    unsafe fn test_mm512_set_epi16() {
50901        let r = _mm512_set1_epi16(2);
50902        assert_eq_m512i(
50903            r,
50904            _mm512_set_epi16(
50905                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50906                2, 2, 2, 2,
50907            ),
50908        )
50909    }
50910
50911    #[simd_test(enable = "avx512f")]
50912    unsafe fn test_mm512_set_epi32() {
50913        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50914        assert_eq_m512i(
50915            r,
50916            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50917        )
50918    }
50919
50920    #[simd_test(enable = "avx512f")]
50921    unsafe fn test_mm512_setr_epi32() {
50922        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50923        assert_eq_m512i(
50924            r,
50925            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50926        )
50927    }
50928
50929    #[simd_test(enable = "avx512f")]
50930    unsafe fn test_mm512_set1_epi8() {
50931        let r = _mm512_set_epi8(
50932            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50933            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50934            2, 2, 2, 2, 2, 2,
50935        );
50936        assert_eq_m512i(r, _mm512_set1_epi8(2));
50937    }
50938
50939    #[simd_test(enable = "avx512f")]
50940    unsafe fn test_mm512_set1_epi16() {
50941        let r = _mm512_set_epi16(
50942            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50943            2, 2, 2,
50944        );
50945        assert_eq_m512i(r, _mm512_set1_epi16(2));
50946    }
50947
50948    #[simd_test(enable = "avx512f")]
50949    unsafe fn test_mm512_set1_epi32() {
50950        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50951        assert_eq_m512i(r, _mm512_set1_epi32(2));
50952    }
50953
50954    #[simd_test(enable = "avx512f")]
50955    unsafe fn test_mm512_setzero_si512() {
50956        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
50957    }
50958
50959    #[simd_test(enable = "avx512f")]
50960    unsafe fn test_mm512_setzero_epi32() {
50961        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
50962    }
50963
50964    #[simd_test(enable = "avx512f")]
50965    unsafe fn test_mm512_set_ps() {
50966        let r = _mm512_setr_ps(
50967            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50968        );
50969        assert_eq_m512(
50970            r,
50971            _mm512_set_ps(
50972                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50973            ),
50974        )
50975    }
50976
50977    #[simd_test(enable = "avx512f")]
50978    unsafe fn test_mm512_setr_ps() {
50979        let r = _mm512_set_ps(
50980            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50981        );
50982        assert_eq_m512(
50983            r,
50984            _mm512_setr_ps(
50985                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50986            ),
50987        )
50988    }
50989
50990    #[simd_test(enable = "avx512f")]
50991    unsafe fn test_mm512_set1_ps() {
50992        #[rustfmt::skip]
50993        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
50994                                     2., 2., 2., 2., 2., 2., 2., 2.);
50995        assert_eq_m512(expected, _mm512_set1_ps(2.));
50996    }
50997
50998    #[simd_test(enable = "avx512f")]
50999    unsafe fn test_mm512_set4_epi32() {
51000        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51001        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
51002    }
51003
51004    #[simd_test(enable = "avx512f")]
51005    unsafe fn test_mm512_set4_ps() {
51006        let r = _mm512_set_ps(
51007            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51008        );
51009        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
51010    }
51011
51012    #[simd_test(enable = "avx512f")]
51013    unsafe fn test_mm512_setr4_epi32() {
51014        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51015        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
51016    }
51017
51018    #[simd_test(enable = "avx512f")]
51019    unsafe fn test_mm512_setr4_ps() {
51020        let r = _mm512_set_ps(
51021            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51022        );
51023        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
51024    }
51025
51026    #[simd_test(enable = "avx512f")]
51027    unsafe fn test_mm512_setzero_ps() {
51028        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
51029    }
51030
51031    #[simd_test(enable = "avx512f")]
51032    unsafe fn test_mm512_setzero() {
51033        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
51034    }
51035
51036    #[simd_test(enable = "avx512f")]
51037    unsafe fn test_mm512_loadu_pd() {
51038        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
51039        let p = a.as_ptr();
51040        let r = _mm512_loadu_pd(black_box(p));
51041        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
51042        assert_eq_m512d(r, e);
51043    }
51044
51045    #[simd_test(enable = "avx512f")]
51046    unsafe fn test_mm512_storeu_pd() {
51047        let a = _mm512_set1_pd(9.);
51048        let mut r = _mm512_undefined_pd();
51049        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51050        assert_eq_m512d(r, a);
51051    }
51052
51053    #[simd_test(enable = "avx512f")]
51054    unsafe fn test_mm512_loadu_ps() {
51055        let a = &[
51056            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51057        ];
51058        let p = a.as_ptr();
51059        let r = _mm512_loadu_ps(black_box(p));
51060        let e = _mm512_setr_ps(
51061            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51062        );
51063        assert_eq_m512(r, e);
51064    }
51065
51066    #[simd_test(enable = "avx512f")]
51067    unsafe fn test_mm512_storeu_ps() {
51068        let a = _mm512_set1_ps(9.);
51069        let mut r = _mm512_undefined_ps();
51070        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51071        assert_eq_m512(r, a);
51072    }
51073
51074    #[simd_test(enable = "avx512f")]
51075    unsafe fn test_mm512_mask_loadu_epi32() {
51076        let src = _mm512_set1_epi32(42);
51077        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51078        let p = a.as_ptr();
51079        let m = 0b11101000_11001010;
51080        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51081        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51082        assert_eq_m512i(r, e);
51083    }
51084
51085    #[simd_test(enable = "avx512f")]
51086    unsafe fn test_mm512_maskz_loadu_epi32() {
51087        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51088        let p = a.as_ptr();
51089        let m = 0b11101000_11001010;
51090        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51091        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51092        assert_eq_m512i(r, e);
51093    }
51094
51095    #[simd_test(enable = "avx512f")]
51096    unsafe fn test_mm512_mask_load_epi32() {
51097        #[repr(align(64))]
51098        struct Align {
51099            data: [i32; 16], // 64 bytes
51100        }
51101        let src = _mm512_set1_epi32(42);
51102        let a = Align {
51103            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51104        };
51105        let p = a.data.as_ptr();
51106        let m = 0b11101000_11001010;
51107        let r = _mm512_mask_load_epi32(src, m, black_box(p));
51108        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51109        assert_eq_m512i(r, e);
51110    }
51111
51112    #[simd_test(enable = "avx512f")]
51113    unsafe fn test_mm512_maskz_load_epi32() {
51114        #[repr(align(64))]
51115        struct Align {
51116            data: [i32; 16], // 64 bytes
51117        }
51118        let a = Align {
51119            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51120        };
51121        let p = a.data.as_ptr();
51122        let m = 0b11101000_11001010;
51123        let r = _mm512_maskz_load_epi32(m, black_box(p));
51124        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51125        assert_eq_m512i(r, e);
51126    }
51127
51128    #[simd_test(enable = "avx512f")]
51129    unsafe fn test_mm512_mask_storeu_epi32() {
51130        let mut r = [42_i32; 16];
51131        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51132        let m = 0b11101000_11001010;
51133        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51134        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51135        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51136    }
51137
51138    #[simd_test(enable = "avx512f")]
51139    unsafe fn test_mm512_mask_store_epi32() {
51140        #[repr(align(64))]
51141        struct Align {
51142            data: [i32; 16],
51143        }
51144        let mut r = Align { data: [42; 16] };
51145        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51146        let m = 0b11101000_11001010;
51147        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51148        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51149        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51150    }
51151
51152    #[simd_test(enable = "avx512f")]
51153    unsafe fn test_mm512_mask_loadu_epi64() {
51154        let src = _mm512_set1_epi64(42);
51155        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51156        let p = a.as_ptr();
51157        let m = 0b11001010;
51158        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51159        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51160        assert_eq_m512i(r, e);
51161    }
51162
51163    #[simd_test(enable = "avx512f")]
51164    unsafe fn test_mm512_maskz_loadu_epi64() {
51165        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51166        let p = a.as_ptr();
51167        let m = 0b11001010;
51168        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51169        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51170        assert_eq_m512i(r, e);
51171    }
51172
51173    #[simd_test(enable = "avx512f")]
51174    unsafe fn test_mm512_mask_load_epi64() {
51175        #[repr(align(64))]
51176        struct Align {
51177            data: [i64; 8], // 64 bytes
51178        }
51179        let src = _mm512_set1_epi64(42);
51180        let a = Align {
51181            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51182        };
51183        let p = a.data.as_ptr();
51184        let m = 0b11001010;
51185        let r = _mm512_mask_load_epi64(src, m, black_box(p));
51186        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51187        assert_eq_m512i(r, e);
51188    }
51189
51190    #[simd_test(enable = "avx512f")]
51191    unsafe fn test_mm512_maskz_load_epi64() {
51192        #[repr(align(64))]
51193        struct Align {
51194            data: [i64; 8], // 64 bytes
51195        }
51196        let a = Align {
51197            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51198        };
51199        let p = a.data.as_ptr();
51200        let m = 0b11001010;
51201        let r = _mm512_maskz_load_epi64(m, black_box(p));
51202        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51203        assert_eq_m512i(r, e);
51204    }
51205
51206    #[simd_test(enable = "avx512f")]
51207    unsafe fn test_mm512_mask_storeu_epi64() {
51208        let mut r = [42_i64; 8];
51209        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51210        let m = 0b11001010;
51211        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51212        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51213        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51214    }
51215
51216    #[simd_test(enable = "avx512f")]
51217    unsafe fn test_mm512_mask_store_epi64() {
51218        #[repr(align(64))]
51219        struct Align {
51220            data: [i64; 8],
51221        }
51222        let mut r = Align { data: [42; 8] };
51223        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51224        let m = 0b11001010;
51225        let p = r.data.as_mut_ptr();
51226        _mm512_mask_store_epi64(p, m, a);
51227        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51228        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51229    }
51230
51231    #[simd_test(enable = "avx512f")]
51232    unsafe fn test_mm512_mask_loadu_ps() {
51233        let src = _mm512_set1_ps(42.0);
51234        let a = &[
51235            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51236            16.0,
51237        ];
51238        let p = a.as_ptr();
51239        let m = 0b11101000_11001010;
51240        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51241        let e = _mm512_setr_ps(
51242            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51243            16.0,
51244        );
51245        assert_eq_m512(r, e);
51246    }
51247
51248    #[simd_test(enable = "avx512f")]
51249    unsafe fn test_mm512_maskz_loadu_ps() {
51250        let a = &[
51251            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51252            16.0,
51253        ];
51254        let p = a.as_ptr();
51255        let m = 0b11101000_11001010;
51256        let r = _mm512_maskz_loadu_ps(m, black_box(p));
51257        let e = _mm512_setr_ps(
51258            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51259        );
51260        assert_eq_m512(r, e);
51261    }
51262
51263    #[simd_test(enable = "avx512f")]
51264    unsafe fn test_mm512_mask_load_ps() {
51265        #[repr(align(64))]
51266        struct Align {
51267            data: [f32; 16], // 64 bytes
51268        }
51269        let src = _mm512_set1_ps(42.0);
51270        let a = Align {
51271            data: [
51272                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51273                15.0, 16.0,
51274            ],
51275        };
51276        let p = a.data.as_ptr();
51277        let m = 0b11101000_11001010;
51278        let r = _mm512_mask_load_ps(src, m, black_box(p));
51279        let e = _mm512_setr_ps(
51280            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51281            16.0,
51282        );
51283        assert_eq_m512(r, e);
51284    }
51285
51286    #[simd_test(enable = "avx512f")]
51287    unsafe fn test_mm512_maskz_load_ps() {
51288        #[repr(align(64))]
51289        struct Align {
51290            data: [f32; 16], // 64 bytes
51291        }
51292        let a = Align {
51293            data: [
51294                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51295                15.0, 16.0,
51296            ],
51297        };
51298        let p = a.data.as_ptr();
51299        let m = 0b11101000_11001010;
51300        let r = _mm512_maskz_load_ps(m, black_box(p));
51301        let e = _mm512_setr_ps(
51302            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51303        );
51304        assert_eq_m512(r, e);
51305    }
51306
51307    #[simd_test(enable = "avx512f")]
51308    unsafe fn test_mm512_mask_storeu_ps() {
51309        let mut r = [42_f32; 16];
51310        let a = _mm512_setr_ps(
51311            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51312        );
51313        let m = 0b11101000_11001010;
51314        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51315        let e = _mm512_setr_ps(
51316            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51317            16.0,
51318        );
51319        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51320    }
51321
51322    #[simd_test(enable = "avx512f")]
51323    unsafe fn test_mm512_mask_store_ps() {
51324        #[repr(align(64))]
51325        struct Align {
51326            data: [f32; 16],
51327        }
51328        let mut r = Align { data: [42.0; 16] };
51329        let a = _mm512_setr_ps(
51330            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51331        );
51332        let m = 0b11101000_11001010;
51333        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51334        let e = _mm512_setr_ps(
51335            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51336            16.0,
51337        );
51338        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51339    }
51340
51341    #[simd_test(enable = "avx512f")]
51342    unsafe fn test_mm512_mask_loadu_pd() {
51343        let src = _mm512_set1_pd(42.0);
51344        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51345        let p = a.as_ptr();
51346        let m = 0b11001010;
51347        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51348        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51349        assert_eq_m512d(r, e);
51350    }
51351
51352    #[simd_test(enable = "avx512f")]
51353    unsafe fn test_mm512_maskz_loadu_pd() {
51354        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51355        let p = a.as_ptr();
51356        let m = 0b11001010;
51357        let r = _mm512_maskz_loadu_pd(m, black_box(p));
51358        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51359        assert_eq_m512d(r, e);
51360    }
51361
51362    #[simd_test(enable = "avx512f")]
51363    unsafe fn test_mm512_mask_load_pd() {
51364        #[repr(align(64))]
51365        struct Align {
51366            data: [f64; 8], // 64 bytes
51367        }
51368        let src = _mm512_set1_pd(42.0);
51369        let a = Align {
51370            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51371        };
51372        let p = a.data.as_ptr();
51373        let m = 0b11001010;
51374        let r = _mm512_mask_load_pd(src, m, black_box(p));
51375        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51376        assert_eq_m512d(r, e);
51377    }
51378
51379    #[simd_test(enable = "avx512f")]
51380    unsafe fn test_mm512_maskz_load_pd() {
51381        #[repr(align(64))]
51382        struct Align {
51383            data: [f64; 8], // 64 bytes
51384        }
51385        let a = Align {
51386            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51387        };
51388        let p = a.data.as_ptr();
51389        let m = 0b11001010;
51390        let r = _mm512_maskz_load_pd(m, black_box(p));
51391        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51392        assert_eq_m512d(r, e);
51393    }
51394
51395    #[simd_test(enable = "avx512f")]
51396    unsafe fn test_mm512_mask_storeu_pd() {
51397        let mut r = [42_f64; 8];
51398        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51399        let m = 0b11001010;
51400        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51401        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51402        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51403    }
51404
51405    #[simd_test(enable = "avx512f")]
51406    unsafe fn test_mm512_mask_store_pd() {
51407        #[repr(align(64))]
51408        struct Align {
51409            data: [f64; 8],
51410        }
51411        let mut r = Align { data: [42.0; 8] };
51412        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51413        let m = 0b11001010;
51414        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51415        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51416        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51417    }
51418
51419    #[simd_test(enable = "avx512f,avx512vl")]
51420    unsafe fn test_mm256_mask_loadu_epi32() {
51421        let src = _mm256_set1_epi32(42);
51422        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51423        let p = a.as_ptr();
51424        let m = 0b11001010;
51425        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51426        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51427        assert_eq_m256i(r, e);
51428    }
51429
51430    #[simd_test(enable = "avx512f,avx512vl")]
51431    unsafe fn test_mm256_maskz_loadu_epi32() {
51432        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51433        let p = a.as_ptr();
51434        let m = 0b11001010;
51435        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51436        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51437        assert_eq_m256i(r, e);
51438    }
51439
51440    #[simd_test(enable = "avx512f,avx512vl")]
51441    unsafe fn test_mm256_mask_load_epi32() {
51442        #[repr(align(32))]
51443        struct Align {
51444            data: [i32; 8], // 32 bytes
51445        }
51446        let src = _mm256_set1_epi32(42);
51447        let a = Align {
51448            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51449        };
51450        let p = a.data.as_ptr();
51451        let m = 0b11001010;
51452        let r = _mm256_mask_load_epi32(src, m, black_box(p));
51453        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51454        assert_eq_m256i(r, e);
51455    }
51456
51457    #[simd_test(enable = "avx512f,avx512vl")]
51458    unsafe fn test_mm256_maskz_load_epi32() {
51459        #[repr(align(32))]
51460        struct Align {
51461            data: [i32; 8], // 32 bytes
51462        }
51463        let a = Align {
51464            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51465        };
51466        let p = a.data.as_ptr();
51467        let m = 0b11001010;
51468        let r = _mm256_maskz_load_epi32(m, black_box(p));
51469        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51470        assert_eq_m256i(r, e);
51471    }
51472
51473    #[simd_test(enable = "avx512f,avx512vl")]
51474    unsafe fn test_mm256_mask_storeu_epi32() {
51475        let mut r = [42_i32; 8];
51476        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51477        let m = 0b11001010;
51478        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51479        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51480        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51481    }
51482
51483    #[simd_test(enable = "avx512f,avx512vl")]
51484    unsafe fn test_mm256_mask_store_epi32() {
51485        #[repr(align(64))]
51486        struct Align {
51487            data: [i32; 8],
51488        }
51489        let mut r = Align { data: [42; 8] };
51490        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51491        let m = 0b11001010;
51492        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51493        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51494        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51495    }
51496
51497    #[simd_test(enable = "avx512f,avx512vl")]
51498    unsafe fn test_mm256_mask_loadu_epi64() {
51499        let src = _mm256_set1_epi64x(42);
51500        let a = &[1_i64, 2, 3, 4];
51501        let p = a.as_ptr();
51502        let m = 0b1010;
51503        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51504        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51505        assert_eq_m256i(r, e);
51506    }
51507
51508    #[simd_test(enable = "avx512f,avx512vl")]
51509    unsafe fn test_mm256_maskz_loadu_epi64() {
51510        let a = &[1_i64, 2, 3, 4];
51511        let p = a.as_ptr();
51512        let m = 0b1010;
51513        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51514        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51515        assert_eq_m256i(r, e);
51516    }
51517
51518    #[simd_test(enable = "avx512f,avx512vl")]
51519    unsafe fn test_mm256_mask_load_epi64() {
51520        #[repr(align(32))]
51521        struct Align {
51522            data: [i64; 4], // 32 bytes
51523        }
51524        let src = _mm256_set1_epi64x(42);
51525        let a = Align {
51526            data: [1_i64, 2, 3, 4],
51527        };
51528        let p = a.data.as_ptr();
51529        let m = 0b1010;
51530        let r = _mm256_mask_load_epi64(src, m, black_box(p));
51531        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51532        assert_eq_m256i(r, e);
51533    }
51534
51535    #[simd_test(enable = "avx512f,avx512vl")]
51536    unsafe fn test_mm256_maskz_load_epi64() {
51537        #[repr(align(32))]
51538        struct Align {
51539            data: [i64; 4], // 32 bytes
51540        }
51541        let a = Align {
51542            data: [1_i64, 2, 3, 4],
51543        };
51544        let p = a.data.as_ptr();
51545        let m = 0b1010;
51546        let r = _mm256_maskz_load_epi64(m, black_box(p));
51547        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51548        assert_eq_m256i(r, e);
51549    }
51550
51551    #[simd_test(enable = "avx512f,avx512vl")]
51552    unsafe fn test_mm256_mask_storeu_epi64() {
51553        let mut r = [42_i64; 4];
51554        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51555        let m = 0b1010;
51556        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51557        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51558        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51559    }
51560
51561    #[simd_test(enable = "avx512f,avx512vl")]
51562    unsafe fn test_mm256_mask_store_epi64() {
51563        #[repr(align(32))]
51564        struct Align {
51565            data: [i64; 4],
51566        }
51567        let mut r = Align { data: [42; 4] };
51568        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51569        let m = 0b1010;
51570        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51571        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51572        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51573    }
51574
51575    #[simd_test(enable = "avx512f,avx512vl")]
51576    unsafe fn test_mm256_mask_loadu_ps() {
51577        let src = _mm256_set1_ps(42.0);
51578        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51579        let p = a.as_ptr();
51580        let m = 0b11001010;
51581        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51582        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51583        assert_eq_m256(r, e);
51584    }
51585
51586    #[simd_test(enable = "avx512f,avx512vl")]
51587    unsafe fn test_mm256_maskz_loadu_ps() {
51588        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51589        let p = a.as_ptr();
51590        let m = 0b11001010;
51591        let r = _mm256_maskz_loadu_ps(m, black_box(p));
51592        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51593        assert_eq_m256(r, e);
51594    }
51595
51596    #[simd_test(enable = "avx512f,avx512vl")]
51597    unsafe fn test_mm256_mask_load_ps() {
51598        #[repr(align(32))]
51599        struct Align {
51600            data: [f32; 8], // 32 bytes
51601        }
51602        let src = _mm256_set1_ps(42.0);
51603        let a = Align {
51604            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51605        };
51606        let p = a.data.as_ptr();
51607        let m = 0b11001010;
51608        let r = _mm256_mask_load_ps(src, m, black_box(p));
51609        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51610        assert_eq_m256(r, e);
51611    }
51612
51613    #[simd_test(enable = "avx512f,avx512vl")]
51614    unsafe fn test_mm256_maskz_load_ps() {
51615        #[repr(align(32))]
51616        struct Align {
51617            data: [f32; 8], // 32 bytes
51618        }
51619        let a = Align {
51620            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51621        };
51622        let p = a.data.as_ptr();
51623        let m = 0b11001010;
51624        let r = _mm256_maskz_load_ps(m, black_box(p));
51625        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51626        assert_eq_m256(r, e);
51627    }
51628
51629    #[simd_test(enable = "avx512f,avx512vl")]
51630    unsafe fn test_mm256_mask_storeu_ps() {
51631        let mut r = [42_f32; 8];
51632        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51633        let m = 0b11001010;
51634        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51635        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51636        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51637    }
51638
51639    #[simd_test(enable = "avx512f,avx512vl")]
51640    unsafe fn test_mm256_mask_store_ps() {
51641        #[repr(align(32))]
51642        struct Align {
51643            data: [f32; 8],
51644        }
51645        let mut r = Align { data: [42.0; 8] };
51646        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51647        let m = 0b11001010;
51648        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51649        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51650        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51651    }
51652
51653    #[simd_test(enable = "avx512f,avx512vl")]
51654    unsafe fn test_mm256_mask_loadu_pd() {
51655        let src = _mm256_set1_pd(42.0);
51656        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51657        let p = a.as_ptr();
51658        let m = 0b1010;
51659        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51660        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51661        assert_eq_m256d(r, e);
51662    }
51663
51664    #[simd_test(enable = "avx512f,avx512vl")]
51665    unsafe fn test_mm256_maskz_loadu_pd() {
51666        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51667        let p = a.as_ptr();
51668        let m = 0b1010;
51669        let r = _mm256_maskz_loadu_pd(m, black_box(p));
51670        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51671        assert_eq_m256d(r, e);
51672    }
51673
51674    #[simd_test(enable = "avx512f,avx512vl")]
51675    unsafe fn test_mm256_mask_load_pd() {
51676        #[repr(align(32))]
51677        struct Align {
51678            data: [f64; 4], // 32 bytes
51679        }
51680        let src = _mm256_set1_pd(42.0);
51681        let a = Align {
51682            data: [1.0_f64, 2.0, 3.0, 4.0],
51683        };
51684        let p = a.data.as_ptr();
51685        let m = 0b1010;
51686        let r = _mm256_mask_load_pd(src, m, black_box(p));
51687        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51688        assert_eq_m256d(r, e);
51689    }
51690
51691    #[simd_test(enable = "avx512f,avx512vl")]
51692    unsafe fn test_mm256_maskz_load_pd() {
51693        #[repr(align(32))]
51694        struct Align {
51695            data: [f64; 4], // 32 bytes
51696        }
51697        let a = Align {
51698            data: [1.0_f64, 2.0, 3.0, 4.0],
51699        };
51700        let p = a.data.as_ptr();
51701        let m = 0b1010;
51702        let r = _mm256_maskz_load_pd(m, black_box(p));
51703        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51704        assert_eq_m256d(r, e);
51705    }
51706
51707    #[simd_test(enable = "avx512f,avx512vl")]
51708    unsafe fn test_mm256_mask_storeu_pd() {
51709        let mut r = [42_f64; 4];
51710        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51711        let m = 0b1010;
51712        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51713        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51714        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51715    }
51716
51717    #[simd_test(enable = "avx512f,avx512vl")]
51718    unsafe fn test_mm256_mask_store_pd() {
51719        #[repr(align(32))]
51720        struct Align {
51721            data: [f64; 4],
51722        }
51723        let mut r = Align { data: [42.0; 4] };
51724        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51725        let m = 0b1010;
51726        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51727        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51728        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51729    }
51730
51731    #[simd_test(enable = "avx512f,avx512vl")]
51732    unsafe fn test_mm_mask_loadu_epi32() {
51733        let src = _mm_set1_epi32(42);
51734        let a = &[1_i32, 2, 3, 4];
51735        let p = a.as_ptr();
51736        let m = 0b1010;
51737        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51738        let e = _mm_setr_epi32(42, 2, 42, 4);
51739        assert_eq_m128i(r, e);
51740    }
51741
51742    #[simd_test(enable = "avx512f,avx512vl")]
51743    unsafe fn test_mm_maskz_loadu_epi32() {
51744        let a = &[1_i32, 2, 3, 4];
51745        let p = a.as_ptr();
51746        let m = 0b1010;
51747        let r = _mm_maskz_loadu_epi32(m, black_box(p));
51748        let e = _mm_setr_epi32(0, 2, 0, 4);
51749        assert_eq_m128i(r, e);
51750    }
51751
51752    #[simd_test(enable = "avx512f,avx512vl")]
51753    unsafe fn test_mm_mask_load_epi32() {
51754        #[repr(align(16))]
51755        struct Align {
51756            data: [i32; 4], // 32 bytes
51757        }
51758        let src = _mm_set1_epi32(42);
51759        let a = Align {
51760            data: [1_i32, 2, 3, 4],
51761        };
51762        let p = a.data.as_ptr();
51763        let m = 0b1010;
51764        let r = _mm_mask_load_epi32(src, m, black_box(p));
51765        let e = _mm_setr_epi32(42, 2, 42, 4);
51766        assert_eq_m128i(r, e);
51767    }
51768
51769    #[simd_test(enable = "avx512f,avx512vl")]
51770    unsafe fn test_mm_maskz_load_epi32() {
51771        #[repr(align(16))]
51772        struct Align {
51773            data: [i32; 4], // 16 bytes
51774        }
51775        let a = Align {
51776            data: [1_i32, 2, 3, 4],
51777        };
51778        let p = a.data.as_ptr();
51779        let m = 0b1010;
51780        let r = _mm_maskz_load_epi32(m, black_box(p));
51781        let e = _mm_setr_epi32(0, 2, 0, 4);
51782        assert_eq_m128i(r, e);
51783    }
51784
51785    #[simd_test(enable = "avx512f,avx512vl")]
51786    unsafe fn test_mm_mask_storeu_epi32() {
51787        let mut r = [42_i32; 4];
51788        let a = _mm_setr_epi32(1, 2, 3, 4);
51789        let m = 0b1010;
51790        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51791        let e = _mm_setr_epi32(42, 2, 42, 4);
51792        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51793    }
51794
51795    #[simd_test(enable = "avx512f,avx512vl")]
51796    unsafe fn test_mm_mask_store_epi32() {
51797        #[repr(align(16))]
51798        struct Align {
51799            data: [i32; 4], // 16 bytes
51800        }
51801        let mut r = Align { data: [42; 4] };
51802        let a = _mm_setr_epi32(1, 2, 3, 4);
51803        let m = 0b1010;
51804        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51805        let e = _mm_setr_epi32(42, 2, 42, 4);
51806        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51807    }
51808
51809    #[simd_test(enable = "avx512f,avx512vl")]
51810    unsafe fn test_mm_mask_loadu_epi64() {
51811        let src = _mm_set1_epi64x(42);
51812        let a = &[1_i64, 2];
51813        let p = a.as_ptr();
51814        let m = 0b10;
51815        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51816        let e = _mm_setr_epi64x(42, 2);
51817        assert_eq_m128i(r, e);
51818    }
51819
51820    #[simd_test(enable = "avx512f,avx512vl")]
51821    unsafe fn test_mm_maskz_loadu_epi64() {
51822        let a = &[1_i64, 2];
51823        let p = a.as_ptr();
51824        let m = 0b10;
51825        let r = _mm_maskz_loadu_epi64(m, black_box(p));
51826        let e = _mm_setr_epi64x(0, 2);
51827        assert_eq_m128i(r, e);
51828    }
51829
51830    #[simd_test(enable = "avx512f,avx512vl")]
51831    unsafe fn test_mm_mask_load_epi64() {
51832        #[repr(align(16))]
51833        struct Align {
51834            data: [i64; 2], // 16 bytes
51835        }
51836        let src = _mm_set1_epi64x(42);
51837        let a = Align { data: [1_i64, 2] };
51838        let p = a.data.as_ptr();
51839        let m = 0b10;
51840        let r = _mm_mask_load_epi64(src, m, black_box(p));
51841        let e = _mm_setr_epi64x(42, 2);
51842        assert_eq_m128i(r, e);
51843    }
51844
51845    #[simd_test(enable = "avx512f,avx512vl")]
51846    unsafe fn test_mm_maskz_load_epi64() {
51847        #[repr(align(16))]
51848        struct Align {
51849            data: [i64; 2], // 16 bytes
51850        }
51851        let a = Align { data: [1_i64, 2] };
51852        let p = a.data.as_ptr();
51853        let m = 0b10;
51854        let r = _mm_maskz_load_epi64(m, black_box(p));
51855        let e = _mm_setr_epi64x(0, 2);
51856        assert_eq_m128i(r, e);
51857    }
51858
51859    #[simd_test(enable = "avx512f,avx512vl")]
51860    unsafe fn test_mm_mask_storeu_epi64() {
51861        let mut r = [42_i64; 2];
51862        let a = _mm_setr_epi64x(1, 2);
51863        let m = 0b10;
51864        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51865        let e = _mm_setr_epi64x(42, 2);
51866        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51867    }
51868
51869    #[simd_test(enable = "avx512f,avx512vl")]
51870    unsafe fn test_mm_mask_store_epi64() {
51871        #[repr(align(16))]
51872        struct Align {
51873            data: [i64; 2], // 16 bytes
51874        }
51875        let mut r = Align { data: [42; 2] };
51876        let a = _mm_setr_epi64x(1, 2);
51877        let m = 0b10;
51878        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51879        let e = _mm_setr_epi64x(42, 2);
51880        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51881    }
51882
51883    #[simd_test(enable = "avx512f,avx512vl")]
51884    unsafe fn test_mm_mask_loadu_ps() {
51885        let src = _mm_set1_ps(42.0);
51886        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51887        let p = a.as_ptr();
51888        let m = 0b1010;
51889        let r = _mm_mask_loadu_ps(src, m, black_box(p));
51890        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51891        assert_eq_m128(r, e);
51892    }
51893
51894    #[simd_test(enable = "avx512f,avx512vl")]
51895    unsafe fn test_mm_maskz_loadu_ps() {
51896        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51897        let p = a.as_ptr();
51898        let m = 0b1010;
51899        let r = _mm_maskz_loadu_ps(m, black_box(p));
51900        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51901        assert_eq_m128(r, e);
51902    }
51903
51904    #[simd_test(enable = "avx512f,avx512vl")]
51905    unsafe fn test_mm_mask_load_ps() {
51906        #[repr(align(16))]
51907        struct Align {
51908            data: [f32; 4], // 16 bytes
51909        }
51910        let src = _mm_set1_ps(42.0);
51911        let a = Align {
51912            data: [1.0_f32, 2.0, 3.0, 4.0],
51913        };
51914        let p = a.data.as_ptr();
51915        let m = 0b1010;
51916        let r = _mm_mask_load_ps(src, m, black_box(p));
51917        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51918        assert_eq_m128(r, e);
51919    }
51920
51921    #[simd_test(enable = "avx512f,avx512vl")]
51922    unsafe fn test_mm_maskz_load_ps() {
51923        #[repr(align(16))]
51924        struct Align {
51925            data: [f32; 4], // 16 bytes
51926        }
51927        let a = Align {
51928            data: [1.0_f32, 2.0, 3.0, 4.0],
51929        };
51930        let p = a.data.as_ptr();
51931        let m = 0b1010;
51932        let r = _mm_maskz_load_ps(m, black_box(p));
51933        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51934        assert_eq_m128(r, e);
51935    }
51936
51937    #[simd_test(enable = "avx512f,avx512vl")]
51938    unsafe fn test_mm_mask_storeu_ps() {
51939        let mut r = [42_f32; 4];
51940        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51941        let m = 0b1010;
51942        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
51943        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51944        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
51945    }
51946
51947    #[simd_test(enable = "avx512f,avx512vl")]
51948    unsafe fn test_mm_mask_store_ps() {
51949        #[repr(align(16))]
51950        struct Align {
51951            data: [f32; 4], // 16 bytes
51952        }
51953        let mut r = Align { data: [42.0; 4] };
51954        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51955        let m = 0b1010;
51956        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
51957        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51958        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
51959    }
51960
51961    #[simd_test(enable = "avx512f,avx512vl")]
51962    unsafe fn test_mm_mask_loadu_pd() {
51963        let src = _mm_set1_pd(42.0);
51964        let a = &[1.0_f64, 2.0];
51965        let p = a.as_ptr();
51966        let m = 0b10;
51967        let r = _mm_mask_loadu_pd(src, m, black_box(p));
51968        let e = _mm_setr_pd(42.0, 2.0);
51969        assert_eq_m128d(r, e);
51970    }
51971
51972    #[simd_test(enable = "avx512f,avx512vl")]
51973    unsafe fn test_mm_maskz_loadu_pd() {
51974        let a = &[1.0_f64, 2.0];
51975        let p = a.as_ptr();
51976        let m = 0b10;
51977        let r = _mm_maskz_loadu_pd(m, black_box(p));
51978        let e = _mm_setr_pd(0.0, 2.0);
51979        assert_eq_m128d(r, e);
51980    }
51981
51982    #[simd_test(enable = "avx512f,avx512vl")]
51983    unsafe fn test_mm_mask_load_pd() {
51984        #[repr(align(16))]
51985        struct Align {
51986            data: [f64; 2], // 16 bytes
51987        }
51988        let src = _mm_set1_pd(42.0);
51989        let a = Align {
51990            data: [1.0_f64, 2.0],
51991        };
51992        let p = a.data.as_ptr();
51993        let m = 0b10;
51994        let r = _mm_mask_load_pd(src, m, black_box(p));
51995        let e = _mm_setr_pd(42.0, 2.0);
51996        assert_eq_m128d(r, e);
51997    }
51998
51999    #[simd_test(enable = "avx512f,avx512vl")]
52000    unsafe fn test_mm_maskz_load_pd() {
52001        #[repr(align(16))]
52002        struct Align {
52003            data: [f64; 2], // 16 bytes
52004        }
52005        let a = Align {
52006            data: [1.0_f64, 2.0],
52007        };
52008        let p = a.data.as_ptr();
52009        let m = 0b10;
52010        let r = _mm_maskz_load_pd(m, black_box(p));
52011        let e = _mm_setr_pd(0.0, 2.0);
52012        assert_eq_m128d(r, e);
52013    }
52014
52015    #[simd_test(enable = "avx512f")]
52016    unsafe fn test_mm_mask_load_ss() {
52017        #[repr(align(16))]
52018        struct Align {
52019            data: f32,
52020        }
52021        let src = _mm_set_ss(2.0);
52022        let mem = Align { data: 1.0 };
52023        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
52024        assert_eq_m128(r, _mm_set_ss(1.0));
52025        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
52026        assert_eq_m128(r, _mm_set_ss(2.0));
52027    }
52028
52029    #[simd_test(enable = "avx512f")]
52030    unsafe fn test_mm_maskz_load_ss() {
52031        #[repr(align(16))]
52032        struct Align {
52033            data: f32,
52034        }
52035        let mem = Align { data: 1.0 };
52036        let r = _mm_maskz_load_ss(0b1, &mem.data);
52037        assert_eq_m128(r, _mm_set_ss(1.0));
52038        let r = _mm_maskz_load_ss(0b0, &mem.data);
52039        assert_eq_m128(r, _mm_set_ss(0.0));
52040    }
52041
52042    #[simd_test(enable = "avx512f")]
52043    unsafe fn test_mm_mask_load_sd() {
52044        #[repr(align(16))]
52045        struct Align {
52046            data: f64,
52047        }
52048        let src = _mm_set_sd(2.0);
52049        let mem = Align { data: 1.0 };
52050        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
52051        assert_eq_m128d(r, _mm_set_sd(1.0));
52052        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
52053        assert_eq_m128d(r, _mm_set_sd(2.0));
52054    }
52055
52056    #[simd_test(enable = "avx512f")]
52057    unsafe fn test_mm_maskz_load_sd() {
52058        #[repr(align(16))]
52059        struct Align {
52060            data: f64,
52061        }
52062        let mem = Align { data: 1.0 };
52063        let r = _mm_maskz_load_sd(0b1, &mem.data);
52064        assert_eq_m128d(r, _mm_set_sd(1.0));
52065        let r = _mm_maskz_load_sd(0b0, &mem.data);
52066        assert_eq_m128d(r, _mm_set_sd(0.0));
52067    }
52068
52069    #[simd_test(enable = "avx512f,avx512vl")]
52070    unsafe fn test_mm_mask_storeu_pd() {
52071        let mut r = [42_f64; 2];
52072        let a = _mm_setr_pd(1.0, 2.0);
52073        let m = 0b10;
52074        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52075        let e = _mm_setr_pd(42.0, 2.0);
52076        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52077    }
52078
52079    #[simd_test(enable = "avx512f,avx512vl")]
52080    unsafe fn test_mm_mask_store_pd() {
52081        #[repr(align(16))]
52082        struct Align {
52083            data: [f64; 2], // 16 bytes
52084        }
52085        let mut r = Align { data: [42.0; 2] };
52086        let a = _mm_setr_pd(1.0, 2.0);
52087        let m = 0b10;
52088        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52089        let e = _mm_setr_pd(42.0, 2.0);
52090        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52091    }
52092
52093    #[simd_test(enable = "avx512f")]
52094    unsafe fn test_mm_mask_store_ss() {
52095        #[repr(align(16))]
52096        struct Align {
52097            data: f32,
52098        }
52099        let a = _mm_set_ss(2.0);
52100        let mut mem = Align { data: 1.0 };
52101        _mm_mask_store_ss(&mut mem.data, 0b1, a);
52102        assert_eq!(mem.data, 2.0);
52103        _mm_mask_store_ss(&mut mem.data, 0b0, a);
52104        assert_eq!(mem.data, 2.0);
52105    }
52106
52107    #[simd_test(enable = "avx512f")]
52108    unsafe fn test_mm_mask_store_sd() {
52109        #[repr(align(16))]
52110        struct Align {
52111            data: f64,
52112        }
52113        let a = _mm_set_sd(2.0);
52114        let mut mem = Align { data: 1.0 };
52115        _mm_mask_store_sd(&mut mem.data, 0b1, a);
52116        assert_eq!(mem.data, 2.0);
52117        _mm_mask_store_sd(&mut mem.data, 0b0, a);
52118        assert_eq!(mem.data, 2.0);
52119    }
52120
52121    #[simd_test(enable = "avx512f")]
52122    unsafe fn test_mm512_setr_pd() {
52123        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52124        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52125    }
52126
52127    #[simd_test(enable = "avx512f")]
52128    unsafe fn test_mm512_set_pd() {
52129        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52130        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52131    }
52132
52133    #[simd_test(enable = "avx512f")]
52134    unsafe fn test_mm512_rol_epi32() {
52135        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52136        let r = _mm512_rol_epi32::<1>(a);
52137        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52138        assert_eq_m512i(r, e);
52139    }
52140
52141    #[simd_test(enable = "avx512f")]
52142    unsafe fn test_mm512_mask_rol_epi32() {
52143        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52144        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
52145        assert_eq_m512i(r, a);
52146        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
52147        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52148        assert_eq_m512i(r, e);
52149    }
52150
52151    #[simd_test(enable = "avx512f")]
52152    unsafe fn test_mm512_maskz_rol_epi32() {
52153        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52154        let r = _mm512_maskz_rol_epi32::<1>(0, a);
52155        assert_eq_m512i(r, _mm512_setzero_si512());
52156        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
52157        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52158        assert_eq_m512i(r, e);
52159    }
52160
52161    #[simd_test(enable = "avx512f,avx512vl")]
52162    unsafe fn test_mm256_rol_epi32() {
52163        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52164        let r = _mm256_rol_epi32::<1>(a);
52165        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52166        assert_eq_m256i(r, e);
52167    }
52168
52169    #[simd_test(enable = "avx512f,avx512vl")]
52170    unsafe fn test_mm256_mask_rol_epi32() {
52171        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52172        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
52173        assert_eq_m256i(r, a);
52174        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
52175        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52176        assert_eq_m256i(r, e);
52177    }
52178
52179    #[simd_test(enable = "avx512f,avx512vl")]
52180    unsafe fn test_mm256_maskz_rol_epi32() {
52181        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52182        let r = _mm256_maskz_rol_epi32::<1>(0, a);
52183        assert_eq_m256i(r, _mm256_setzero_si256());
52184        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
52185        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52186        assert_eq_m256i(r, e);
52187    }
52188
52189    #[simd_test(enable = "avx512f,avx512vl")]
52190    unsafe fn test_mm_rol_epi32() {
52191        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52192        let r = _mm_rol_epi32::<1>(a);
52193        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52194        assert_eq_m128i(r, e);
52195    }
52196
52197    #[simd_test(enable = "avx512f,avx512vl")]
52198    unsafe fn test_mm_mask_rol_epi32() {
52199        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52200        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
52201        assert_eq_m128i(r, a);
52202        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
52203        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52204        assert_eq_m128i(r, e);
52205    }
52206
52207    #[simd_test(enable = "avx512f,avx512vl")]
52208    unsafe fn test_mm_maskz_rol_epi32() {
52209        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52210        let r = _mm_maskz_rol_epi32::<1>(0, a);
52211        assert_eq_m128i(r, _mm_setzero_si128());
52212        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
52213        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52214        assert_eq_m128i(r, e);
52215    }
52216
52217    #[simd_test(enable = "avx512f")]
52218    unsafe fn test_mm512_ror_epi32() {
52219        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52220        let r = _mm512_ror_epi32::<1>(a);
52221        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52222        assert_eq_m512i(r, e);
52223    }
52224
52225    #[simd_test(enable = "avx512f")]
52226    unsafe fn test_mm512_mask_ror_epi32() {
52227        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52228        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
52229        assert_eq_m512i(r, a);
52230        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
52231        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52232        assert_eq_m512i(r, e);
52233    }
52234
52235    #[simd_test(enable = "avx512f")]
52236    unsafe fn test_mm512_maskz_ror_epi32() {
52237        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52238        let r = _mm512_maskz_ror_epi32::<1>(0, a);
52239        assert_eq_m512i(r, _mm512_setzero_si512());
52240        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
52241        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52242        assert_eq_m512i(r, e);
52243    }
52244
52245    #[simd_test(enable = "avx512f,avx512vl")]
52246    unsafe fn test_mm256_ror_epi32() {
52247        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52248        let r = _mm256_ror_epi32::<1>(a);
52249        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52250        assert_eq_m256i(r, e);
52251    }
52252
52253    #[simd_test(enable = "avx512f,avx512vl")]
52254    unsafe fn test_mm256_mask_ror_epi32() {
52255        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52256        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
52257        assert_eq_m256i(r, a);
52258        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
52259        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52260        assert_eq_m256i(r, e);
52261    }
52262
52263    #[simd_test(enable = "avx512f,avx512vl")]
52264    unsafe fn test_mm256_maskz_ror_epi32() {
52265        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52266        let r = _mm256_maskz_ror_epi32::<1>(0, a);
52267        assert_eq_m256i(r, _mm256_setzero_si256());
52268        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
52269        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52270        assert_eq_m256i(r, e);
52271    }
52272
52273    #[simd_test(enable = "avx512f,avx512vl")]
52274    unsafe fn test_mm_ror_epi32() {
52275        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52276        let r = _mm_ror_epi32::<1>(a);
52277        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52278        assert_eq_m128i(r, e);
52279    }
52280
52281    #[simd_test(enable = "avx512f,avx512vl")]
52282    unsafe fn test_mm_mask_ror_epi32() {
52283        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52284        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
52285        assert_eq_m128i(r, a);
52286        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
52287        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52288        assert_eq_m128i(r, e);
52289    }
52290
52291    #[simd_test(enable = "avx512f,avx512vl")]
52292    unsafe fn test_mm_maskz_ror_epi32() {
52293        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52294        let r = _mm_maskz_ror_epi32::<1>(0, a);
52295        assert_eq_m128i(r, _mm_setzero_si128());
52296        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
52297        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52298        assert_eq_m128i(r, e);
52299    }
52300
52301    #[simd_test(enable = "avx512f")]
52302    unsafe fn test_mm512_slli_epi32() {
52303        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52304        let r = _mm512_slli_epi32::<1>(a);
52305        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52306        assert_eq_m512i(r, e);
52307    }
52308
52309    #[simd_test(enable = "avx512f")]
52310    unsafe fn test_mm512_mask_slli_epi32() {
52311        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52312        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
52313        assert_eq_m512i(r, a);
52314        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
52315        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52316        assert_eq_m512i(r, e);
52317    }
52318
52319    #[simd_test(enable = "avx512f")]
52320    unsafe fn test_mm512_maskz_slli_epi32() {
52321        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52322        let r = _mm512_maskz_slli_epi32::<1>(0, a);
52323        assert_eq_m512i(r, _mm512_setzero_si512());
52324        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
52325        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52326        assert_eq_m512i(r, e);
52327    }
52328
52329    #[simd_test(enable = "avx512f,avx512vl")]
52330    unsafe fn test_mm256_mask_slli_epi32() {
52331        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52332        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
52333        assert_eq_m256i(r, a);
52334        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
52335        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52336        assert_eq_m256i(r, e);
52337    }
52338
52339    #[simd_test(enable = "avx512f,avx512vl")]
52340    unsafe fn test_mm256_maskz_slli_epi32() {
52341        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52342        let r = _mm256_maskz_slli_epi32::<1>(0, a);
52343        assert_eq_m256i(r, _mm256_setzero_si256());
52344        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
52345        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52346        assert_eq_m256i(r, e);
52347    }
52348
52349    #[simd_test(enable = "avx512f,avx512vl")]
52350    unsafe fn test_mm_mask_slli_epi32() {
52351        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52352        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
52353        assert_eq_m128i(r, a);
52354        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
52355        let e = _mm_set_epi32(0, 2, 2, 2);
52356        assert_eq_m128i(r, e);
52357    }
52358
52359    #[simd_test(enable = "avx512f,avx512vl")]
52360    unsafe fn test_mm_maskz_slli_epi32() {
52361        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52362        let r = _mm_maskz_slli_epi32::<1>(0, a);
52363        assert_eq_m128i(r, _mm_setzero_si128());
52364        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
52365        let e = _mm_set_epi32(0, 2, 2, 2);
52366        assert_eq_m128i(r, e);
52367    }
52368
52369    #[simd_test(enable = "avx512f")]
52370    unsafe fn test_mm512_srli_epi32() {
52371        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52372        let r = _mm512_srli_epi32::<1>(a);
52373        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52374        assert_eq_m512i(r, e);
52375    }
52376
52377    #[simd_test(enable = "avx512f")]
52378    unsafe fn test_mm512_mask_srli_epi32() {
52379        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52380        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
52381        assert_eq_m512i(r, a);
52382        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
52383        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52384        assert_eq_m512i(r, e);
52385    }
52386
52387    #[simd_test(enable = "avx512f")]
52388    unsafe fn test_mm512_maskz_srli_epi32() {
52389        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52390        let r = _mm512_maskz_srli_epi32::<1>(0, a);
52391        assert_eq_m512i(r, _mm512_setzero_si512());
52392        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
52393        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
52394        assert_eq_m512i(r, e);
52395    }
52396
52397    #[simd_test(enable = "avx512f,avx512vl")]
52398    unsafe fn test_mm256_mask_srli_epi32() {
52399        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52400        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
52401        assert_eq_m256i(r, a);
52402        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
52403        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52404        assert_eq_m256i(r, e);
52405    }
52406
52407    #[simd_test(enable = "avx512f,avx512vl")]
52408    unsafe fn test_mm256_maskz_srli_epi32() {
52409        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52410        let r = _mm256_maskz_srli_epi32::<1>(0, a);
52411        assert_eq_m256i(r, _mm256_setzero_si256());
52412        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
52413        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52414        assert_eq_m256i(r, e);
52415    }
52416
52417    #[simd_test(enable = "avx512f,avx512vl")]
52418    unsafe fn test_mm_mask_srli_epi32() {
52419        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52420        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
52421        assert_eq_m128i(r, a);
52422        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
52423        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52424        assert_eq_m128i(r, e);
52425    }
52426
52427    #[simd_test(enable = "avx512f,avx512vl")]
52428    unsafe fn test_mm_maskz_srli_epi32() {
52429        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52430        let r = _mm_maskz_srli_epi32::<1>(0, a);
52431        assert_eq_m128i(r, _mm_setzero_si128());
52432        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
52433        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52434        assert_eq_m128i(r, e);
52435    }
52436
52437    #[simd_test(enable = "avx512f")]
52438    unsafe fn test_mm512_rolv_epi32() {
52439        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52440        let b = _mm512_set1_epi32(1);
52441        let r = _mm512_rolv_epi32(a, b);
52442        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52443        assert_eq_m512i(r, e);
52444    }
52445
52446    #[simd_test(enable = "avx512f")]
52447    unsafe fn test_mm512_mask_rolv_epi32() {
52448        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52449        let b = _mm512_set1_epi32(1);
52450        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
52451        assert_eq_m512i(r, a);
52452        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
52453        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52454        assert_eq_m512i(r, e);
52455    }
52456
52457    #[simd_test(enable = "avx512f")]
52458    unsafe fn test_mm512_maskz_rolv_epi32() {
52459        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52460        let b = _mm512_set1_epi32(1);
52461        let r = _mm512_maskz_rolv_epi32(0, a, b);
52462        assert_eq_m512i(r, _mm512_setzero_si512());
52463        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
52464        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52465        assert_eq_m512i(r, e);
52466    }
52467
52468    #[simd_test(enable = "avx512f,avx512vl")]
52469    unsafe fn test_mm256_rolv_epi32() {
52470        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52471        let b = _mm256_set1_epi32(1);
52472        let r = _mm256_rolv_epi32(a, b);
52473        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52474        assert_eq_m256i(r, e);
52475    }
52476
52477    #[simd_test(enable = "avx512f,avx512vl")]
52478    unsafe fn test_mm256_mask_rolv_epi32() {
52479        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52480        let b = _mm256_set1_epi32(1);
52481        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
52482        assert_eq_m256i(r, a);
52483        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
52484        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52485        assert_eq_m256i(r, e);
52486    }
52487
52488    #[simd_test(enable = "avx512f,avx512vl")]
52489    unsafe fn test_mm256_maskz_rolv_epi32() {
52490        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52491        let b = _mm256_set1_epi32(1);
52492        let r = _mm256_maskz_rolv_epi32(0, a, b);
52493        assert_eq_m256i(r, _mm256_setzero_si256());
52494        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
52495        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52496        assert_eq_m256i(r, e);
52497    }
52498
52499    #[simd_test(enable = "avx512f,avx512vl")]
52500    unsafe fn test_mm_rolv_epi32() {
52501        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52502        let b = _mm_set1_epi32(1);
52503        let r = _mm_rolv_epi32(a, b);
52504        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52505        assert_eq_m128i(r, e);
52506    }
52507
52508    #[simd_test(enable = "avx512f,avx512vl")]
52509    unsafe fn test_mm_mask_rolv_epi32() {
52510        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52511        let b = _mm_set1_epi32(1);
52512        let r = _mm_mask_rolv_epi32(a, 0, a, b);
52513        assert_eq_m128i(r, a);
52514        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
52515        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52516        assert_eq_m128i(r, e);
52517    }
52518
52519    #[simd_test(enable = "avx512f,avx512vl")]
52520    unsafe fn test_mm_maskz_rolv_epi32() {
52521        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52522        let b = _mm_set1_epi32(1);
52523        let r = _mm_maskz_rolv_epi32(0, a, b);
52524        assert_eq_m128i(r, _mm_setzero_si128());
52525        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
52526        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52527        assert_eq_m128i(r, e);
52528    }
52529
52530    #[simd_test(enable = "avx512f")]
52531    unsafe fn test_mm512_rorv_epi32() {
52532        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52533        let b = _mm512_set1_epi32(1);
52534        let r = _mm512_rorv_epi32(a, b);
52535        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52536        assert_eq_m512i(r, e);
52537    }
52538
52539    #[simd_test(enable = "avx512f")]
52540    unsafe fn test_mm512_mask_rorv_epi32() {
52541        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52542        let b = _mm512_set1_epi32(1);
52543        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
52544        assert_eq_m512i(r, a);
52545        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
52546        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52547        assert_eq_m512i(r, e);
52548    }
52549
52550    #[simd_test(enable = "avx512f")]
52551    unsafe fn test_mm512_maskz_rorv_epi32() {
52552        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52553        let b = _mm512_set1_epi32(1);
52554        let r = _mm512_maskz_rorv_epi32(0, a, b);
52555        assert_eq_m512i(r, _mm512_setzero_si512());
52556        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
52557        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52558        assert_eq_m512i(r, e);
52559    }
52560
52561    #[simd_test(enable = "avx512f,avx512vl")]
52562    unsafe fn test_mm256_rorv_epi32() {
52563        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52564        let b = _mm256_set1_epi32(1);
52565        let r = _mm256_rorv_epi32(a, b);
52566        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52567        assert_eq_m256i(r, e);
52568    }
52569
52570    #[simd_test(enable = "avx512f,avx512vl")]
52571    unsafe fn test_mm256_mask_rorv_epi32() {
52572        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52573        let b = _mm256_set1_epi32(1);
52574        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
52575        assert_eq_m256i(r, a);
52576        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
52577        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52578        assert_eq_m256i(r, e);
52579    }
52580
52581    #[simd_test(enable = "avx512f,avx512vl")]
52582    unsafe fn test_mm256_maskz_rorv_epi32() {
52583        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52584        let b = _mm256_set1_epi32(1);
52585        let r = _mm256_maskz_rorv_epi32(0, a, b);
52586        assert_eq_m256i(r, _mm256_setzero_si256());
52587        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
52588        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52589        assert_eq_m256i(r, e);
52590    }
52591
52592    #[simd_test(enable = "avx512f,avx512vl")]
52593    unsafe fn test_mm_rorv_epi32() {
52594        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52595        let b = _mm_set1_epi32(1);
52596        let r = _mm_rorv_epi32(a, b);
52597        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52598        assert_eq_m128i(r, e);
52599    }
52600
52601    #[simd_test(enable = "avx512f,avx512vl")]
52602    unsafe fn test_mm_mask_rorv_epi32() {
52603        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52604        let b = _mm_set1_epi32(1);
52605        let r = _mm_mask_rorv_epi32(a, 0, a, b);
52606        assert_eq_m128i(r, a);
52607        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
52608        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52609        assert_eq_m128i(r, e);
52610    }
52611
52612    #[simd_test(enable = "avx512f,avx512vl")]
52613    unsafe fn test_mm_maskz_rorv_epi32() {
52614        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52615        let b = _mm_set1_epi32(1);
52616        let r = _mm_maskz_rorv_epi32(0, a, b);
52617        assert_eq_m128i(r, _mm_setzero_si128());
52618        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
52619        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52620        assert_eq_m128i(r, e);
52621    }
52622
52623    #[simd_test(enable = "avx512f")]
52624    unsafe fn test_mm512_sllv_epi32() {
52625        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52626        let count = _mm512_set1_epi32(1);
52627        let r = _mm512_sllv_epi32(a, count);
52628        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52629        assert_eq_m512i(r, e);
52630    }
52631
52632    #[simd_test(enable = "avx512f")]
52633    unsafe fn test_mm512_mask_sllv_epi32() {
52634        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52635        let count = _mm512_set1_epi32(1);
52636        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
52637        assert_eq_m512i(r, a);
52638        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
52639        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52640        assert_eq_m512i(r, e);
52641    }
52642
52643    #[simd_test(enable = "avx512f")]
52644    unsafe fn test_mm512_maskz_sllv_epi32() {
52645        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52646        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52647        let r = _mm512_maskz_sllv_epi32(0, a, count);
52648        assert_eq_m512i(r, _mm512_setzero_si512());
52649        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
52650        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52651        assert_eq_m512i(r, e);
52652    }
52653
52654    #[simd_test(enable = "avx512f,avx512vl")]
52655    unsafe fn test_mm256_mask_sllv_epi32() {
52656        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52657        let count = _mm256_set1_epi32(1);
52658        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
52659        assert_eq_m256i(r, a);
52660        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
52661        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52662        assert_eq_m256i(r, e);
52663    }
52664
52665    #[simd_test(enable = "avx512f,avx512vl")]
52666    unsafe fn test_mm256_maskz_sllv_epi32() {
52667        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52668        let count = _mm256_set1_epi32(1);
52669        let r = _mm256_maskz_sllv_epi32(0, a, count);
52670        assert_eq_m256i(r, _mm256_setzero_si256());
52671        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
52672        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52673        assert_eq_m256i(r, e);
52674    }
52675
52676    #[simd_test(enable = "avx512f,avx512vl")]
52677    unsafe fn test_mm_mask_sllv_epi32() {
52678        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52679        let count = _mm_set1_epi32(1);
52680        let r = _mm_mask_sllv_epi32(a, 0, a, count);
52681        assert_eq_m128i(r, a);
52682        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
52683        let e = _mm_set_epi32(0, 2, 2, 2);
52684        assert_eq_m128i(r, e);
52685    }
52686
52687    #[simd_test(enable = "avx512f,avx512vl")]
52688    unsafe fn test_mm_maskz_sllv_epi32() {
52689        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52690        let count = _mm_set1_epi32(1);
52691        let r = _mm_maskz_sllv_epi32(0, a, count);
52692        assert_eq_m128i(r, _mm_setzero_si128());
52693        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
52694        let e = _mm_set_epi32(0, 2, 2, 2);
52695        assert_eq_m128i(r, e);
52696    }
52697
52698    #[simd_test(enable = "avx512f")]
52699    unsafe fn test_mm512_srlv_epi32() {
52700        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52701        let count = _mm512_set1_epi32(1);
52702        let r = _mm512_srlv_epi32(a, count);
52703        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52704        assert_eq_m512i(r, e);
52705    }
52706
52707    #[simd_test(enable = "avx512f")]
52708    unsafe fn test_mm512_mask_srlv_epi32() {
52709        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52710        let count = _mm512_set1_epi32(1);
52711        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
52712        assert_eq_m512i(r, a);
52713        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
52714        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52715        assert_eq_m512i(r, e);
52716    }
52717
52718    #[simd_test(enable = "avx512f")]
52719    unsafe fn test_mm512_maskz_srlv_epi32() {
52720        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52721        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52722        let r = _mm512_maskz_srlv_epi32(0, a, count);
52723        assert_eq_m512i(r, _mm512_setzero_si512());
52724        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
52725        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
52726        assert_eq_m512i(r, e);
52727    }
52728
52729    #[simd_test(enable = "avx512f,avx512vl")]
52730    unsafe fn test_mm256_mask_srlv_epi32() {
52731        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52732        let count = _mm256_set1_epi32(1);
52733        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
52734        assert_eq_m256i(r, a);
52735        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
52736        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52737        assert_eq_m256i(r, e);
52738    }
52739
52740    #[simd_test(enable = "avx512f,avx512vl")]
52741    unsafe fn test_mm256_maskz_srlv_epi32() {
52742        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52743        let count = _mm256_set1_epi32(1);
52744        let r = _mm256_maskz_srlv_epi32(0, a, count);
52745        assert_eq_m256i(r, _mm256_setzero_si256());
52746        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
52747        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52748        assert_eq_m256i(r, e);
52749    }
52750
52751    #[simd_test(enable = "avx512f,avx512vl")]
52752    unsafe fn test_mm_mask_srlv_epi32() {
52753        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52754        let count = _mm_set1_epi32(1);
52755        let r = _mm_mask_srlv_epi32(a, 0, a, count);
52756        assert_eq_m128i(r, a);
52757        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
52758        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52759        assert_eq_m128i(r, e);
52760    }
52761
52762    #[simd_test(enable = "avx512f,avx512vl")]
52763    unsafe fn test_mm_maskz_srlv_epi32() {
52764        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52765        let count = _mm_set1_epi32(1);
52766        let r = _mm_maskz_srlv_epi32(0, a, count);
52767        assert_eq_m128i(r, _mm_setzero_si128());
52768        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
52769        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52770        assert_eq_m128i(r, e);
52771    }
52772
52773    #[simd_test(enable = "avx512f")]
52774    unsafe fn test_mm512_sll_epi32() {
52775        #[rustfmt::skip]
52776        let a = _mm512_set_epi32(
52777            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52778            0, 0, 0, 0,
52779            0, 0, 0, 0,
52780            0, 0, 0, 0,
52781        );
52782        let count = _mm_set_epi32(0, 0, 0, 2);
52783        let r = _mm512_sll_epi32(a, count);
52784        #[rustfmt::skip]
52785        let e = _mm512_set_epi32(
52786            0, 1 << 2, 1 << 3, 1 << 4,
52787            0, 0, 0, 0,
52788            0, 0, 0, 0,
52789            0, 0, 0, 0,
52790        );
52791        assert_eq_m512i(r, e);
52792    }
52793
52794    #[simd_test(enable = "avx512f")]
52795    unsafe fn test_mm512_mask_sll_epi32() {
52796        #[rustfmt::skip]
52797        let a = _mm512_set_epi32(
52798            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52799            0, 0, 0, 0,
52800            0, 0, 0, 0,
52801            0, 0, 0, 0,
52802        );
52803        let count = _mm_set_epi32(0, 0, 0, 2);
52804        let r = _mm512_mask_sll_epi32(a, 0, a, count);
52805        assert_eq_m512i(r, a);
52806        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
52807        #[rustfmt::skip]
52808        let e = _mm512_set_epi32(
52809            0, 1 << 2, 1 << 3, 1 << 4,
52810            0, 0, 0, 0,
52811            0, 0, 0, 0,
52812            0, 0, 0, 0,
52813        );
52814        assert_eq_m512i(r, e);
52815    }
52816
52817    #[simd_test(enable = "avx512f")]
52818    unsafe fn test_mm512_maskz_sll_epi32() {
52819        #[rustfmt::skip]
52820        let a = _mm512_set_epi32(
52821            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52822            0, 0, 0, 0,
52823            0, 0, 0, 0,
52824            0, 0, 0, 1 << 31,
52825        );
52826        let count = _mm_set_epi32(2, 0, 0, 2);
52827        let r = _mm512_maskz_sll_epi32(0, a, count);
52828        assert_eq_m512i(r, _mm512_setzero_si512());
52829        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
52830        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52831        assert_eq_m512i(r, e);
52832    }
52833
52834    #[simd_test(enable = "avx512f,avx512vl")]
52835    unsafe fn test_mm256_mask_sll_epi32() {
52836        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52837        let count = _mm_set_epi32(0, 0, 0, 1);
52838        let r = _mm256_mask_sll_epi32(a, 0, a, count);
52839        assert_eq_m256i(r, a);
52840        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
52841        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52842        assert_eq_m256i(r, e);
52843    }
52844
52845    #[simd_test(enable = "avx512f,avx512vl")]
52846    unsafe fn test_mm256_maskz_sll_epi32() {
52847        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52848        let count = _mm_set_epi32(0, 0, 0, 1);
52849        let r = _mm256_maskz_sll_epi32(0, a, count);
52850        assert_eq_m256i(r, _mm256_setzero_si256());
52851        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
52852        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52853        assert_eq_m256i(r, e);
52854    }
52855
52856    #[simd_test(enable = "avx512f,avx512vl")]
52857    unsafe fn test_mm_mask_sll_epi32() {
52858        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52859        let count = _mm_set_epi32(0, 0, 0, 1);
52860        let r = _mm_mask_sll_epi32(a, 0, a, count);
52861        assert_eq_m128i(r, a);
52862        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
52863        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52864        assert_eq_m128i(r, e);
52865    }
52866
52867    #[simd_test(enable = "avx512f,avx512vl")]
52868    unsafe fn test_mm_maskz_sll_epi32() {
52869        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52870        let count = _mm_set_epi32(0, 0, 0, 1);
52871        let r = _mm_maskz_sll_epi32(0, a, count);
52872        assert_eq_m128i(r, _mm_setzero_si128());
52873        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
52874        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52875        assert_eq_m128i(r, e);
52876    }
52877
52878    #[simd_test(enable = "avx512f")]
52879    unsafe fn test_mm512_srl_epi32() {
52880        #[rustfmt::skip]
52881        let a = _mm512_set_epi32(
52882            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52883            0, 0, 0, 0,
52884            0, 0, 0, 0,
52885            0, 0, 0, 0,
52886        );
52887        let count = _mm_set_epi32(0, 0, 0, 2);
52888        let r = _mm512_srl_epi32(a, count);
52889        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52890        assert_eq_m512i(r, e);
52891    }
52892
52893    #[simd_test(enable = "avx512f")]
52894    unsafe fn test_mm512_mask_srl_epi32() {
52895        #[rustfmt::skip]
52896        let a = _mm512_set_epi32(
52897            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52898            0, 0, 0, 0,
52899            0, 0, 0, 0,
52900            0, 0, 0, 0,
52901        );
52902        let count = _mm_set_epi32(0, 0, 0, 2);
52903        let r = _mm512_mask_srl_epi32(a, 0, a, count);
52904        assert_eq_m512i(r, a);
52905        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
52906        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52907        assert_eq_m512i(r, e);
52908    }
52909
52910    #[simd_test(enable = "avx512f")]
52911    unsafe fn test_mm512_maskz_srl_epi32() {
52912        #[rustfmt::skip]
52913        let a = _mm512_set_epi32(
52914            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52915            0, 0, 0, 0,
52916            0, 0, 0, 0,
52917            0, 0, 0, 1 << 31,
52918        );
52919        let count = _mm_set_epi32(2, 0, 0, 2);
52920        let r = _mm512_maskz_srl_epi32(0, a, count);
52921        assert_eq_m512i(r, _mm512_setzero_si512());
52922        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
52923        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
52924        assert_eq_m512i(r, e);
52925    }
52926
52927    #[simd_test(enable = "avx512f,avx512vl")]
52928    unsafe fn test_mm256_mask_srl_epi32() {
52929        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52930        let count = _mm_set_epi32(0, 0, 0, 1);
52931        let r = _mm256_mask_srl_epi32(a, 0, a, count);
52932        assert_eq_m256i(r, a);
52933        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
52934        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52935        assert_eq_m256i(r, e);
52936    }
52937
52938    #[simd_test(enable = "avx512f,avx512vl")]
52939    unsafe fn test_mm256_maskz_srl_epi32() {
52940        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52941        let count = _mm_set_epi32(0, 0, 0, 1);
52942        let r = _mm256_maskz_srl_epi32(0, a, count);
52943        assert_eq_m256i(r, _mm256_setzero_si256());
52944        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
52945        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52946        assert_eq_m256i(r, e);
52947    }
52948
52949    #[simd_test(enable = "avx512f,avx512vl")]
52950    unsafe fn test_mm_mask_srl_epi32() {
52951        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52952        let count = _mm_set_epi32(0, 0, 0, 1);
52953        let r = _mm_mask_srl_epi32(a, 0, a, count);
52954        assert_eq_m128i(r, a);
52955        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
52956        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52957        assert_eq_m128i(r, e);
52958    }
52959
52960    #[simd_test(enable = "avx512f,avx512vl")]
52961    unsafe fn test_mm_maskz_srl_epi32() {
52962        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52963        let count = _mm_set_epi32(0, 0, 0, 1);
52964        let r = _mm_maskz_srl_epi32(0, a, count);
52965        assert_eq_m128i(r, _mm_setzero_si128());
52966        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
52967        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52968        assert_eq_m128i(r, e);
52969    }
52970
52971    #[simd_test(enable = "avx512f")]
52972    unsafe fn test_mm512_sra_epi32() {
52973        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
52974        let count = _mm_set_epi32(1, 0, 0, 2);
52975        let r = _mm512_sra_epi32(a, count);
52976        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52977        assert_eq_m512i(r, e);
52978    }
52979
52980    #[simd_test(enable = "avx512f")]
52981    unsafe fn test_mm512_mask_sra_epi32() {
52982        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
52983        let count = _mm_set_epi32(0, 0, 0, 2);
52984        let r = _mm512_mask_sra_epi32(a, 0, a, count);
52985        assert_eq_m512i(r, a);
52986        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
52987        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
52988        assert_eq_m512i(r, e);
52989    }
52990
52991    #[simd_test(enable = "avx512f")]
52992    unsafe fn test_mm512_maskz_sra_epi32() {
52993        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
52994        let count = _mm_set_epi32(2, 0, 0, 2);
52995        let r = _mm512_maskz_sra_epi32(0, a, count);
52996        assert_eq_m512i(r, _mm512_setzero_si512());
52997        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
52998        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
52999        assert_eq_m512i(r, e);
53000    }
53001
53002    #[simd_test(enable = "avx512f,avx512vl")]
53003    unsafe fn test_mm256_mask_sra_epi32() {
53004        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53005        let count = _mm_set_epi32(0, 0, 0, 1);
53006        let r = _mm256_mask_sra_epi32(a, 0, a, count);
53007        assert_eq_m256i(r, a);
53008        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
53009        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53010        assert_eq_m256i(r, e);
53011    }
53012
53013    #[simd_test(enable = "avx512f,avx512vl")]
53014    unsafe fn test_mm256_maskz_sra_epi32() {
53015        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53016        let count = _mm_set_epi32(0, 0, 0, 1);
53017        let r = _mm256_maskz_sra_epi32(0, a, count);
53018        assert_eq_m256i(r, _mm256_setzero_si256());
53019        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
53020        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53021        assert_eq_m256i(r, e);
53022    }
53023
53024    #[simd_test(enable = "avx512f,avx512vl")]
53025    unsafe fn test_mm_mask_sra_epi32() {
53026        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53027        let count = _mm_set_epi32(0, 0, 0, 1);
53028        let r = _mm_mask_sra_epi32(a, 0, a, count);
53029        assert_eq_m128i(r, a);
53030        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
53031        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53032        assert_eq_m128i(r, e);
53033    }
53034
53035    #[simd_test(enable = "avx512f,avx512vl")]
53036    unsafe fn test_mm_maskz_sra_epi32() {
53037        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53038        let count = _mm_set_epi32(0, 0, 0, 1);
53039        let r = _mm_maskz_sra_epi32(0, a, count);
53040        assert_eq_m128i(r, _mm_setzero_si128());
53041        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
53042        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53043        assert_eq_m128i(r, e);
53044    }
53045
53046    #[simd_test(enable = "avx512f")]
53047    unsafe fn test_mm512_srav_epi32() {
53048        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53049        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53050        let r = _mm512_srav_epi32(a, count);
53051        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53052        assert_eq_m512i(r, e);
53053    }
53054
53055    #[simd_test(enable = "avx512f")]
53056    unsafe fn test_mm512_mask_srav_epi32() {
53057        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53058        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53059        let r = _mm512_mask_srav_epi32(a, 0, a, count);
53060        assert_eq_m512i(r, a);
53061        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
53062        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
53063        assert_eq_m512i(r, e);
53064    }
53065
53066    #[simd_test(enable = "avx512f")]
53067    unsafe fn test_mm512_maskz_srav_epi32() {
53068        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53069        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
53070        let r = _mm512_maskz_srav_epi32(0, a, count);
53071        assert_eq_m512i(r, _mm512_setzero_si512());
53072        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
53073        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53074        assert_eq_m512i(r, e);
53075    }
53076
53077    #[simd_test(enable = "avx512f,avx512vl")]
53078    unsafe fn test_mm256_mask_srav_epi32() {
53079        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53080        let count = _mm256_set1_epi32(1);
53081        let r = _mm256_mask_srav_epi32(a, 0, a, count);
53082        assert_eq_m256i(r, a);
53083        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
53084        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53085        assert_eq_m256i(r, e);
53086    }
53087
53088    #[simd_test(enable = "avx512f,avx512vl")]
53089    unsafe fn test_mm256_maskz_srav_epi32() {
53090        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53091        let count = _mm256_set1_epi32(1);
53092        let r = _mm256_maskz_srav_epi32(0, a, count);
53093        assert_eq_m256i(r, _mm256_setzero_si256());
53094        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
53095        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53096        assert_eq_m256i(r, e);
53097    }
53098
53099    #[simd_test(enable = "avx512f,avx512vl")]
53100    unsafe fn test_mm_mask_srav_epi32() {
53101        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53102        let count = _mm_set1_epi32(1);
53103        let r = _mm_mask_srav_epi32(a, 0, a, count);
53104        assert_eq_m128i(r, a);
53105        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
53106        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53107        assert_eq_m128i(r, e);
53108    }
53109
53110    #[simd_test(enable = "avx512f,avx512vl")]
53111    unsafe fn test_mm_maskz_srav_epi32() {
53112        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53113        let count = _mm_set1_epi32(1);
53114        let r = _mm_maskz_srav_epi32(0, a, count);
53115        assert_eq_m128i(r, _mm_setzero_si128());
53116        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
53117        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53118        assert_eq_m128i(r, e);
53119    }
53120
53121    #[simd_test(enable = "avx512f")]
53122    unsafe fn test_mm512_srai_epi32() {
53123        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
53124        let r = _mm512_srai_epi32::<2>(a);
53125        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
53126        assert_eq_m512i(r, e);
53127    }
53128
53129    #[simd_test(enable = "avx512f")]
53130    unsafe fn test_mm512_mask_srai_epi32() {
53131        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53132        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
53133        assert_eq_m512i(r, a);
53134        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
53135        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53136        assert_eq_m512i(r, e);
53137    }
53138
53139    #[simd_test(enable = "avx512f")]
53140    unsafe fn test_mm512_maskz_srai_epi32() {
53141        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53142        let r = _mm512_maskz_srai_epi32::<2>(0, a);
53143        assert_eq_m512i(r, _mm512_setzero_si512());
53144        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
53145        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53146        assert_eq_m512i(r, e);
53147    }
53148
53149    #[simd_test(enable = "avx512f,avx512vl")]
53150    unsafe fn test_mm256_mask_srai_epi32() {
53151        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53152        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
53153        assert_eq_m256i(r, a);
53154        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
53155        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53156        assert_eq_m256i(r, e);
53157    }
53158
53159    #[simd_test(enable = "avx512f,avx512vl")]
53160    unsafe fn test_mm256_maskz_srai_epi32() {
53161        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53162        let r = _mm256_maskz_srai_epi32::<1>(0, a);
53163        assert_eq_m256i(r, _mm256_setzero_si256());
53164        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
53165        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53166        assert_eq_m256i(r, e);
53167    }
53168
53169    #[simd_test(enable = "avx512f,avx512vl")]
53170    unsafe fn test_mm_mask_srai_epi32() {
53171        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53172        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
53173        assert_eq_m128i(r, a);
53174        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
53175        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53176        assert_eq_m128i(r, e);
53177    }
53178
53179    #[simd_test(enable = "avx512f,avx512vl")]
53180    unsafe fn test_mm_maskz_srai_epi32() {
53181        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53182        let r = _mm_maskz_srai_epi32::<1>(0, a);
53183        assert_eq_m128i(r, _mm_setzero_si128());
53184        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
53185        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53186        assert_eq_m128i(r, e);
53187    }
53188
53189    #[simd_test(enable = "avx512f")]
53190    unsafe fn test_mm512_permute_ps() {
53191        let a = _mm512_setr_ps(
53192            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53193        );
53194        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
53195        let e = _mm512_setr_ps(
53196            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53197        );
53198        assert_eq_m512(r, e);
53199    }
53200
53201    #[simd_test(enable = "avx512f")]
53202    unsafe fn test_mm512_mask_permute_ps() {
53203        let a = _mm512_setr_ps(
53204            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53205        );
53206        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53207        assert_eq_m512(r, a);
53208        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
53209        let e = _mm512_setr_ps(
53210            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53211        );
53212        assert_eq_m512(r, e);
53213    }
53214
53215    #[simd_test(enable = "avx512f")]
53216    unsafe fn test_mm512_maskz_permute_ps() {
53217        let a = _mm512_setr_ps(
53218            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53219        );
53220        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
53221        assert_eq_m512(r, _mm512_setzero_ps());
53222        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
53223        let e = _mm512_setr_ps(
53224            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53225        );
53226        assert_eq_m512(r, e);
53227    }
53228
53229    #[simd_test(enable = "avx512f,avx512vl")]
53230    unsafe fn test_mm256_mask_permute_ps() {
53231        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53232        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53233        assert_eq_m256(r, a);
53234        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
53235        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53236        assert_eq_m256(r, e);
53237    }
53238
53239    #[simd_test(enable = "avx512f,avx512vl")]
53240    unsafe fn test_mm256_maskz_permute_ps() {
53241        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53242        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
53243        assert_eq_m256(r, _mm256_setzero_ps());
53244        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
53245        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53246        assert_eq_m256(r, e);
53247    }
53248
53249    #[simd_test(enable = "avx512f,avx512vl")]
53250    unsafe fn test_mm_mask_permute_ps() {
53251        let a = _mm_set_ps(0., 1., 2., 3.);
53252        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53253        assert_eq_m128(r, a);
53254        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
53255        let e = _mm_set_ps(0., 0., 0., 0.);
53256        assert_eq_m128(r, e);
53257    }
53258
53259    #[simd_test(enable = "avx512f,avx512vl")]
53260    unsafe fn test_mm_maskz_permute_ps() {
53261        let a = _mm_set_ps(0., 1., 2., 3.);
53262        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
53263        assert_eq_m128(r, _mm_setzero_ps());
53264        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
53265        let e = _mm_set_ps(0., 0., 0., 0.);
53266        assert_eq_m128(r, e);
53267    }
53268
53269    #[simd_test(enable = "avx512f")]
53270    unsafe fn test_mm512_permutevar_epi32() {
53271        let idx = _mm512_set1_epi32(1);
53272        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53273        let r = _mm512_permutevar_epi32(idx, a);
53274        let e = _mm512_set1_epi32(14);
53275        assert_eq_m512i(r, e);
53276    }
53277
53278    #[simd_test(enable = "avx512f")]
53279    unsafe fn test_mm512_mask_permutevar_epi32() {
53280        let idx = _mm512_set1_epi32(1);
53281        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53282        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
53283        assert_eq_m512i(r, a);
53284        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
53285        let e = _mm512_set1_epi32(14);
53286        assert_eq_m512i(r, e);
53287    }
53288
53289    #[simd_test(enable = "avx512f")]
53290    unsafe fn test_mm512_permutevar_ps() {
53291        let a = _mm512_set_ps(
53292            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53293        );
53294        let b = _mm512_set1_epi32(0b01);
53295        let r = _mm512_permutevar_ps(a, b);
53296        let e = _mm512_set_ps(
53297            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53298        );
53299        assert_eq_m512(r, e);
53300    }
53301
53302    #[simd_test(enable = "avx512f")]
53303    unsafe fn test_mm512_mask_permutevar_ps() {
53304        let a = _mm512_set_ps(
53305            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53306        );
53307        let b = _mm512_set1_epi32(0b01);
53308        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
53309        assert_eq_m512(r, a);
53310        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
53311        let e = _mm512_set_ps(
53312            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53313        );
53314        assert_eq_m512(r, e);
53315    }
53316
53317    #[simd_test(enable = "avx512f")]
53318    unsafe fn test_mm512_maskz_permutevar_ps() {
53319        let a = _mm512_set_ps(
53320            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53321        );
53322        let b = _mm512_set1_epi32(0b01);
53323        let r = _mm512_maskz_permutevar_ps(0, a, b);
53324        assert_eq_m512(r, _mm512_setzero_ps());
53325        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
53326        let e = _mm512_set_ps(
53327            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
53328        );
53329        assert_eq_m512(r, e);
53330    }
53331
53332    #[simd_test(enable = "avx512f,avx512vl")]
53333    unsafe fn test_mm256_mask_permutevar_ps() {
53334        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53335        let b = _mm256_set1_epi32(0b01);
53336        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
53337        assert_eq_m256(r, a);
53338        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
53339        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53340        assert_eq_m256(r, e);
53341    }
53342
53343    #[simd_test(enable = "avx512f,avx512vl")]
53344    unsafe fn test_mm256_maskz_permutevar_ps() {
53345        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53346        let b = _mm256_set1_epi32(0b01);
53347        let r = _mm256_maskz_permutevar_ps(0, a, b);
53348        assert_eq_m256(r, _mm256_setzero_ps());
53349        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
53350        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53351        assert_eq_m256(r, e);
53352    }
53353
53354    #[simd_test(enable = "avx512f,avx512vl")]
53355    unsafe fn test_mm_mask_permutevar_ps() {
53356        let a = _mm_set_ps(0., 1., 2., 3.);
53357        let b = _mm_set1_epi32(0b01);
53358        let r = _mm_mask_permutevar_ps(a, 0, a, b);
53359        assert_eq_m128(r, a);
53360        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
53361        let e = _mm_set_ps(2., 2., 2., 2.);
53362        assert_eq_m128(r, e);
53363    }
53364
53365    #[simd_test(enable = "avx512f,avx512vl")]
53366    unsafe fn test_mm_maskz_permutevar_ps() {
53367        let a = _mm_set_ps(0., 1., 2., 3.);
53368        let b = _mm_set1_epi32(0b01);
53369        let r = _mm_maskz_permutevar_ps(0, a, b);
53370        assert_eq_m128(r, _mm_setzero_ps());
53371        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
53372        let e = _mm_set_ps(2., 2., 2., 2.);
53373        assert_eq_m128(r, e);
53374    }
53375
53376    #[simd_test(enable = "avx512f")]
53377    unsafe fn test_mm512_permutexvar_epi32() {
53378        let idx = _mm512_set1_epi32(1);
53379        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53380        let r = _mm512_permutexvar_epi32(idx, a);
53381        let e = _mm512_set1_epi32(14);
53382        assert_eq_m512i(r, e);
53383    }
53384
53385    #[simd_test(enable = "avx512f")]
53386    unsafe fn test_mm512_mask_permutexvar_epi32() {
53387        let idx = _mm512_set1_epi32(1);
53388        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53389        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
53390        assert_eq_m512i(r, a);
53391        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
53392        let e = _mm512_set1_epi32(14);
53393        assert_eq_m512i(r, e);
53394    }
53395
53396    #[simd_test(enable = "avx512f")]
53397    unsafe fn test_mm512_maskz_permutexvar_epi32() {
53398        let idx = _mm512_set1_epi32(1);
53399        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53400        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
53401        assert_eq_m512i(r, _mm512_setzero_si512());
53402        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
53403        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
53404        assert_eq_m512i(r, e);
53405    }
53406
53407    #[simd_test(enable = "avx512f,avx512vl")]
53408    unsafe fn test_mm256_permutexvar_epi32() {
53409        let idx = _mm256_set1_epi32(1);
53410        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53411        let r = _mm256_permutexvar_epi32(idx, a);
53412        let e = _mm256_set1_epi32(6);
53413        assert_eq_m256i(r, e);
53414    }
53415
53416    #[simd_test(enable = "avx512f,avx512vl")]
53417    unsafe fn test_mm256_mask_permutexvar_epi32() {
53418        let idx = _mm256_set1_epi32(1);
53419        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53420        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
53421        assert_eq_m256i(r, a);
53422        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
53423        let e = _mm256_set1_epi32(6);
53424        assert_eq_m256i(r, e);
53425    }
53426
53427    #[simd_test(enable = "avx512f,avx512vl")]
53428    unsafe fn test_mm256_maskz_permutexvar_epi32() {
53429        let idx = _mm256_set1_epi32(1);
53430        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53431        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
53432        assert_eq_m256i(r, _mm256_setzero_si256());
53433        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
53434        let e = _mm256_set1_epi32(6);
53435        assert_eq_m256i(r, e);
53436    }
53437
53438    #[simd_test(enable = "avx512f")]
53439    unsafe fn test_mm512_permutexvar_ps() {
53440        let idx = _mm512_set1_epi32(1);
53441        let a = _mm512_set_ps(
53442            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53443        );
53444        let r = _mm512_permutexvar_ps(idx, a);
53445        let e = _mm512_set1_ps(14.);
53446        assert_eq_m512(r, e);
53447    }
53448
53449    #[simd_test(enable = "avx512f")]
53450    unsafe fn test_mm512_mask_permutexvar_ps() {
53451        let idx = _mm512_set1_epi32(1);
53452        let a = _mm512_set_ps(
53453            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53454        );
53455        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
53456        assert_eq_m512(r, a);
53457        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
53458        let e = _mm512_set1_ps(14.);
53459        assert_eq_m512(r, e);
53460    }
53461
53462    #[simd_test(enable = "avx512f")]
53463    unsafe fn test_mm512_maskz_permutexvar_ps() {
53464        let idx = _mm512_set1_epi32(1);
53465        let a = _mm512_set_ps(
53466            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53467        );
53468        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
53469        assert_eq_m512(r, _mm512_setzero_ps());
53470        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
53471        let e = _mm512_set_ps(
53472            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
53473        );
53474        assert_eq_m512(r, e);
53475    }
53476
53477    #[simd_test(enable = "avx512f,avx512vl")]
53478    unsafe fn test_mm256_permutexvar_ps() {
53479        let idx = _mm256_set1_epi32(1);
53480        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53481        let r = _mm256_permutexvar_ps(idx, a);
53482        let e = _mm256_set1_ps(6.);
53483        assert_eq_m256(r, e);
53484    }
53485
53486    #[simd_test(enable = "avx512f,avx512vl")]
53487    unsafe fn test_mm256_mask_permutexvar_ps() {
53488        let idx = _mm256_set1_epi32(1);
53489        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53490        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
53491        assert_eq_m256(r, a);
53492        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
53493        let e = _mm256_set1_ps(6.);
53494        assert_eq_m256(r, e);
53495    }
53496
53497    #[simd_test(enable = "avx512f,avx512vl")]
53498    unsafe fn test_mm256_maskz_permutexvar_ps() {
53499        let idx = _mm256_set1_epi32(1);
53500        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53501        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
53502        assert_eq_m256(r, _mm256_setzero_ps());
53503        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
53504        let e = _mm256_set1_ps(6.);
53505        assert_eq_m256(r, e);
53506    }
53507
53508    #[simd_test(enable = "avx512f")]
53509    unsafe fn test_mm512_permutex2var_epi32() {
53510        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53511        #[rustfmt::skip]
53512        let idx = _mm512_set_epi32(
53513            1, 1 << 4, 2, 1 << 4,
53514            3, 1 << 4, 4, 1 << 4,
53515            5, 1 << 4, 6, 1 << 4,
53516            7, 1 << 4, 8, 1 << 4,
53517        );
53518        let b = _mm512_set1_epi32(100);
53519        let r = _mm512_permutex2var_epi32(a, idx, b);
53520        let e = _mm512_set_epi32(
53521            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53522        );
53523        assert_eq_m512i(r, e);
53524    }
53525
53526    #[simd_test(enable = "avx512f")]
53527    unsafe fn test_mm512_mask_permutex2var_epi32() {
53528        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53529        #[rustfmt::skip]
53530        let idx = _mm512_set_epi32(
53531            1, 1 << 4, 2, 1 << 4,
53532            3, 1 << 4, 4, 1 << 4,
53533            5, 1 << 4, 6, 1 << 4,
53534            7, 1 << 4, 8, 1 << 4,
53535        );
53536        let b = _mm512_set1_epi32(100);
53537        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
53538        assert_eq_m512i(r, a);
53539        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
53540        let e = _mm512_set_epi32(
53541            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53542        );
53543        assert_eq_m512i(r, e);
53544    }
53545
53546    #[simd_test(enable = "avx512f")]
53547    unsafe fn test_mm512_maskz_permutex2var_epi32() {
53548        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53549        #[rustfmt::skip]
53550        let idx = _mm512_set_epi32(
53551            1, 1 << 4, 2, 1 << 4,
53552            3, 1 << 4, 4, 1 << 4,
53553            5, 1 << 4, 6, 1 << 4,
53554            7, 1 << 4, 8, 1 << 4,
53555        );
53556        let b = _mm512_set1_epi32(100);
53557        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
53558        assert_eq_m512i(r, _mm512_setzero_si512());
53559        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
53560        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
53561        assert_eq_m512i(r, e);
53562    }
53563
53564    #[simd_test(enable = "avx512f")]
53565    unsafe fn test_mm512_mask2_permutex2var_epi32() {
53566        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53567        #[rustfmt::skip]
53568        let idx = _mm512_set_epi32(
53569            1000, 1 << 4, 2000, 1 << 4,
53570            3000, 1 << 4, 4000, 1 << 4,
53571            5, 1 << 4, 6, 1 << 4,
53572            7, 1 << 4, 8, 1 << 4,
53573        );
53574        let b = _mm512_set1_epi32(100);
53575        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
53576        assert_eq_m512i(r, idx);
53577        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
53578        #[rustfmt::skip]
53579        let e = _mm512_set_epi32(
53580            1000, 1 << 4, 2000, 1 << 4,
53581            3000, 1 << 4, 4000, 1 << 4,
53582            10, 100, 9, 100,
53583            8, 100, 7, 100,
53584        );
53585        assert_eq_m512i(r, e);
53586    }
53587
53588    #[simd_test(enable = "avx512f,avx512vl")]
53589    unsafe fn test_mm256_permutex2var_epi32() {
53590        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53591        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53592        let b = _mm256_set1_epi32(100);
53593        let r = _mm256_permutex2var_epi32(a, idx, b);
53594        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53595        assert_eq_m256i(r, e);
53596    }
53597
53598    #[simd_test(enable = "avx512f,avx512vl")]
53599    unsafe fn test_mm256_mask_permutex2var_epi32() {
53600        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53601        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53602        let b = _mm256_set1_epi32(100);
53603        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
53604        assert_eq_m256i(r, a);
53605        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
53606        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53607        assert_eq_m256i(r, e);
53608    }
53609
53610    #[simd_test(enable = "avx512f,avx512vl")]
53611    unsafe fn test_mm256_maskz_permutex2var_epi32() {
53612        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53613        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53614        let b = _mm256_set1_epi32(100);
53615        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
53616        assert_eq_m256i(r, _mm256_setzero_si256());
53617        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
53618        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53619        assert_eq_m256i(r, e);
53620    }
53621
53622    #[simd_test(enable = "avx512f,avx512vl")]
53623    unsafe fn test_mm256_mask2_permutex2var_epi32() {
53624        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53625        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53626        let b = _mm256_set1_epi32(100);
53627        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
53628        assert_eq_m256i(r, idx);
53629        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
53630        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53631        assert_eq_m256i(r, e);
53632    }
53633
53634    #[simd_test(enable = "avx512f,avx512vl")]
53635    unsafe fn test_mm_permutex2var_epi32() {
53636        let a = _mm_set_epi32(0, 1, 2, 3);
53637        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53638        let b = _mm_set1_epi32(100);
53639        let r = _mm_permutex2var_epi32(a, idx, b);
53640        let e = _mm_set_epi32(2, 100, 1, 100);
53641        assert_eq_m128i(r, e);
53642    }
53643
53644    #[simd_test(enable = "avx512f,avx512vl")]
53645    unsafe fn test_mm_mask_permutex2var_epi32() {
53646        let a = _mm_set_epi32(0, 1, 2, 3);
53647        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53648        let b = _mm_set1_epi32(100);
53649        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
53650        assert_eq_m128i(r, a);
53651        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
53652        let e = _mm_set_epi32(2, 100, 1, 100);
53653        assert_eq_m128i(r, e);
53654    }
53655
53656    #[simd_test(enable = "avx512f,avx512vl")]
53657    unsafe fn test_mm_maskz_permutex2var_epi32() {
53658        let a = _mm_set_epi32(0, 1, 2, 3);
53659        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53660        let b = _mm_set1_epi32(100);
53661        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
53662        assert_eq_m128i(r, _mm_setzero_si128());
53663        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
53664        let e = _mm_set_epi32(2, 100, 1, 100);
53665        assert_eq_m128i(r, e);
53666    }
53667
53668    #[simd_test(enable = "avx512f,avx512vl")]
53669    unsafe fn test_mm_mask2_permutex2var_epi32() {
53670        let a = _mm_set_epi32(0, 1, 2, 3);
53671        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53672        let b = _mm_set1_epi32(100);
53673        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
53674        assert_eq_m128i(r, idx);
53675        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
53676        let e = _mm_set_epi32(2, 100, 1, 100);
53677        assert_eq_m128i(r, e);
53678    }
53679
53680    #[simd_test(enable = "avx512f")]
53681    unsafe fn test_mm512_permutex2var_ps() {
53682        let a = _mm512_set_ps(
53683            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53684        );
53685        #[rustfmt::skip]
53686        let idx = _mm512_set_epi32(
53687            1, 1 << 4, 2, 1 << 4,
53688            3, 1 << 4, 4, 1 << 4,
53689            5, 1 << 4, 6, 1 << 4,
53690            7, 1 << 4, 8, 1 << 4,
53691        );
53692        let b = _mm512_set1_ps(100.);
53693        let r = _mm512_permutex2var_ps(a, idx, b);
53694        let e = _mm512_set_ps(
53695            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53696        );
53697        assert_eq_m512(r, e);
53698    }
53699
53700    #[simd_test(enable = "avx512f")]
53701    unsafe fn test_mm512_mask_permutex2var_ps() {
53702        let a = _mm512_set_ps(
53703            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53704        );
53705        #[rustfmt::skip]
53706        let idx = _mm512_set_epi32(
53707            1, 1 << 4, 2, 1 << 4,
53708            3, 1 << 4, 4, 1 << 4,
53709            5, 1 << 4, 6, 1 << 4,
53710            7, 1 << 4, 8, 1 << 4,
53711        );
53712        let b = _mm512_set1_ps(100.);
53713        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
53714        assert_eq_m512(r, a);
53715        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
53716        let e = _mm512_set_ps(
53717            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53718        );
53719        assert_eq_m512(r, e);
53720    }
53721
53722    #[simd_test(enable = "avx512f")]
53723    unsafe fn test_mm512_maskz_permutex2var_ps() {
53724        let a = _mm512_set_ps(
53725            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53726        );
53727        #[rustfmt::skip]
53728        let idx = _mm512_set_epi32(
53729            1, 1 << 4, 2, 1 << 4,
53730            3, 1 << 4, 4, 1 << 4,
53731            5, 1 << 4, 6, 1 << 4,
53732            7, 1 << 4, 8, 1 << 4,
53733        );
53734        let b = _mm512_set1_ps(100.);
53735        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
53736        assert_eq_m512(r, _mm512_setzero_ps());
53737        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
53738        let e = _mm512_set_ps(
53739            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
53740        );
53741        assert_eq_m512(r, e);
53742    }
53743
53744    #[simd_test(enable = "avx512f")]
53745    unsafe fn test_mm512_mask2_permutex2var_ps() {
53746        let a = _mm512_set_ps(
53747            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53748        );
53749        #[rustfmt::skip]
53750        let idx = _mm512_set_epi32(
53751            1, 1 << 4, 2, 1 << 4,
53752            3, 1 << 4, 4, 1 << 4,
53753            5, 1 << 4, 6, 1 << 4,
53754            7, 1 << 4, 8, 1 << 4,
53755        );
53756        let b = _mm512_set1_ps(100.);
53757        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
53758        assert_eq_m512(r, _mm512_castsi512_ps(idx));
53759        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
53760        let e = _mm512_set_ps(
53761            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53762        );
53763        assert_eq_m512(r, e);
53764    }
53765
53766    #[simd_test(enable = "avx512f,avx512vl")]
53767    unsafe fn test_mm256_permutex2var_ps() {
53768        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53769        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53770        let b = _mm256_set1_ps(100.);
53771        let r = _mm256_permutex2var_ps(a, idx, b);
53772        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53773        assert_eq_m256(r, e);
53774    }
53775
53776    #[simd_test(enable = "avx512f,avx512vl")]
53777    unsafe fn test_mm256_mask_permutex2var_ps() {
53778        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53779        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53780        let b = _mm256_set1_ps(100.);
53781        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
53782        assert_eq_m256(r, a);
53783        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
53784        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53785        assert_eq_m256(r, e);
53786    }
53787
53788    #[simd_test(enable = "avx512f,avx512vl")]
53789    unsafe fn test_mm256_maskz_permutex2var_ps() {
53790        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53791        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53792        let b = _mm256_set1_ps(100.);
53793        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
53794        assert_eq_m256(r, _mm256_setzero_ps());
53795        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
53796        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53797        assert_eq_m256(r, e);
53798    }
53799
53800    #[simd_test(enable = "avx512f,avx512vl")]
53801    unsafe fn test_mm256_mask2_permutex2var_ps() {
53802        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53803        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53804        let b = _mm256_set1_ps(100.);
53805        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
53806        assert_eq_m256(r, _mm256_castsi256_ps(idx));
53807        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
53808        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53809        assert_eq_m256(r, e);
53810    }
53811
53812    #[simd_test(enable = "avx512f,avx512vl")]
53813    unsafe fn test_mm_permutex2var_ps() {
53814        let a = _mm_set_ps(0., 1., 2., 3.);
53815        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53816        let b = _mm_set1_ps(100.);
53817        let r = _mm_permutex2var_ps(a, idx, b);
53818        let e = _mm_set_ps(2., 100., 1., 100.);
53819        assert_eq_m128(r, e);
53820    }
53821
53822    #[simd_test(enable = "avx512f,avx512vl")]
53823    unsafe fn test_mm_mask_permutex2var_ps() {
53824        let a = _mm_set_ps(0., 1., 2., 3.);
53825        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53826        let b = _mm_set1_ps(100.);
53827        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
53828        assert_eq_m128(r, a);
53829        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
53830        let e = _mm_set_ps(2., 100., 1., 100.);
53831        assert_eq_m128(r, e);
53832    }
53833
53834    #[simd_test(enable = "avx512f,avx512vl")]
53835    unsafe fn test_mm_maskz_permutex2var_ps() {
53836        let a = _mm_set_ps(0., 1., 2., 3.);
53837        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53838        let b = _mm_set1_ps(100.);
53839        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
53840        assert_eq_m128(r, _mm_setzero_ps());
53841        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
53842        let e = _mm_set_ps(2., 100., 1., 100.);
53843        assert_eq_m128(r, e);
53844    }
53845
53846    #[simd_test(enable = "avx512f,avx512vl")]
53847    unsafe fn test_mm_mask2_permutex2var_ps() {
53848        let a = _mm_set_ps(0., 1., 2., 3.);
53849        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53850        let b = _mm_set1_ps(100.);
53851        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
53852        assert_eq_m128(r, _mm_castsi128_ps(idx));
53853        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
53854        let e = _mm_set_ps(2., 100., 1., 100.);
53855        assert_eq_m128(r, e);
53856    }
53857
53858    #[simd_test(enable = "avx512f")]
53859    unsafe fn test_mm512_shuffle_epi32() {
53860        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53861        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53862        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53863        assert_eq_m512i(r, e);
53864    }
53865
53866    #[simd_test(enable = "avx512f")]
53867    unsafe fn test_mm512_mask_shuffle_epi32() {
53868        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53869        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53870        assert_eq_m512i(r, a);
53871        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
53872        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53873        assert_eq_m512i(r, e);
53874    }
53875
53876    #[simd_test(enable = "avx512f")]
53877    unsafe fn test_mm512_maskz_shuffle_epi32() {
53878        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53879        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53880        assert_eq_m512i(r, _mm512_setzero_si512());
53881        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
53882        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
53883        assert_eq_m512i(r, e);
53884    }
53885
53886    #[simd_test(enable = "avx512f,avx512vl")]
53887    unsafe fn test_mm256_mask_shuffle_epi32() {
53888        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53889        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53890        assert_eq_m256i(r, a);
53891        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
53892        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53893        assert_eq_m256i(r, e);
53894    }
53895
53896    #[simd_test(enable = "avx512f,avx512vl")]
53897    unsafe fn test_mm256_maskz_shuffle_epi32() {
53898        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53899        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53900        assert_eq_m256i(r, _mm256_setzero_si256());
53901        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
53902        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53903        assert_eq_m256i(r, e);
53904    }
53905
53906    #[simd_test(enable = "avx512f,avx512vl")]
53907    unsafe fn test_mm_mask_shuffle_epi32() {
53908        let a = _mm_set_epi32(1, 4, 5, 8);
53909        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53910        assert_eq_m128i(r, a);
53911        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
53912        let e = _mm_set_epi32(8, 8, 1, 1);
53913        assert_eq_m128i(r, e);
53914    }
53915
53916    #[simd_test(enable = "avx512f,avx512vl")]
53917    unsafe fn test_mm_maskz_shuffle_epi32() {
53918        let a = _mm_set_epi32(1, 4, 5, 8);
53919        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53920        assert_eq_m128i(r, _mm_setzero_si128());
53921        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
53922        let e = _mm_set_epi32(8, 8, 1, 1);
53923        assert_eq_m128i(r, e);
53924    }
53925
53926    #[simd_test(enable = "avx512f")]
53927    unsafe fn test_mm512_shuffle_ps() {
53928        let a = _mm512_setr_ps(
53929            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53930        );
53931        let b = _mm512_setr_ps(
53932            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53933        );
53934        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
53935        let e = _mm512_setr_ps(
53936            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53937        );
53938        assert_eq_m512(r, e);
53939    }
53940
53941    #[simd_test(enable = "avx512f")]
53942    unsafe fn test_mm512_mask_shuffle_ps() {
53943        let a = _mm512_setr_ps(
53944            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53945        );
53946        let b = _mm512_setr_ps(
53947            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53948        );
53949        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
53950        assert_eq_m512(r, a);
53951        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
53952        let e = _mm512_setr_ps(
53953            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53954        );
53955        assert_eq_m512(r, e);
53956    }
53957
53958    #[simd_test(enable = "avx512f")]
53959    unsafe fn test_mm512_maskz_shuffle_ps() {
53960        let a = _mm512_setr_ps(
53961            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53962        );
53963        let b = _mm512_setr_ps(
53964            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53965        );
53966        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
53967        assert_eq_m512(r, _mm512_setzero_ps());
53968        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
53969        let e = _mm512_setr_ps(
53970            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
53971        );
53972        assert_eq_m512(r, e);
53973    }
53974
53975    #[simd_test(enable = "avx512f,avx512vl")]
53976    unsafe fn test_mm256_mask_shuffle_ps() {
53977        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53978        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53979        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53980        assert_eq_m256(r, a);
53981        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
53982        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53983        assert_eq_m256(r, e);
53984    }
53985
53986    #[simd_test(enable = "avx512f,avx512vl")]
53987    unsafe fn test_mm256_maskz_shuffle_ps() {
53988        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53989        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53990        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
53991        assert_eq_m256(r, _mm256_setzero_ps());
53992        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
53993        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53994        assert_eq_m256(r, e);
53995    }
53996
53997    #[simd_test(enable = "avx512f,avx512vl")]
53998    unsafe fn test_mm_mask_shuffle_ps() {
53999        let a = _mm_set_ps(1., 4., 5., 8.);
54000        let b = _mm_set_ps(2., 3., 6., 7.);
54001        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
54002        assert_eq_m128(r, a);
54003        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
54004        let e = _mm_set_ps(7., 7., 1., 1.);
54005        assert_eq_m128(r, e);
54006    }
54007
54008    #[simd_test(enable = "avx512f,avx512vl")]
54009    unsafe fn test_mm_maskz_shuffle_ps() {
54010        let a = _mm_set_ps(1., 4., 5., 8.);
54011        let b = _mm_set_ps(2., 3., 6., 7.);
54012        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54013        assert_eq_m128(r, _mm_setzero_ps());
54014        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
54015        let e = _mm_set_ps(7., 7., 1., 1.);
54016        assert_eq_m128(r, e);
54017    }
54018
54019    #[simd_test(enable = "avx512f")]
54020    unsafe fn test_mm512_shuffle_i32x4() {
54021        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54022        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54023        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
54024        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54025        assert_eq_m512i(r, e);
54026    }
54027
54028    #[simd_test(enable = "avx512f")]
54029    unsafe fn test_mm512_mask_shuffle_i32x4() {
54030        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54031        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54032        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
54033        assert_eq_m512i(r, a);
54034        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54035        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54036        assert_eq_m512i(r, e);
54037    }
54038
54039    #[simd_test(enable = "avx512f")]
54040    unsafe fn test_mm512_maskz_shuffle_i32x4() {
54041        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54042        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54043        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
54044        assert_eq_m512i(r, _mm512_setzero_si512());
54045        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54046        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54047        assert_eq_m512i(r, e);
54048    }
54049
54050    #[simd_test(enable = "avx512f,avx512vl")]
54051    unsafe fn test_mm256_shuffle_i32x4() {
54052        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54053        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54054        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
54055        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54056        assert_eq_m256i(r, e);
54057    }
54058
54059    #[simd_test(enable = "avx512f,avx512vl")]
54060    unsafe fn test_mm256_mask_shuffle_i32x4() {
54061        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54062        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54063        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
54064        assert_eq_m256i(r, a);
54065        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
54066        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54067        assert_eq_m256i(r, e);
54068    }
54069
54070    #[simd_test(enable = "avx512f,avx512vl")]
54071    unsafe fn test_mm256_maskz_shuffle_i32x4() {
54072        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54073        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54074        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
54075        assert_eq_m256i(r, _mm256_setzero_si256());
54076        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
54077        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54078        assert_eq_m256i(r, e);
54079    }
54080
54081    #[simd_test(enable = "avx512f")]
54082    unsafe fn test_mm512_shuffle_f32x4() {
54083        let a = _mm512_setr_ps(
54084            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54085        );
54086        let b = _mm512_setr_ps(
54087            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54088        );
54089        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
54090        let e = _mm512_setr_ps(
54091            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54092        );
54093        assert_eq_m512(r, e);
54094    }
54095
54096    #[simd_test(enable = "avx512f")]
54097    unsafe fn test_mm512_mask_shuffle_f32x4() {
54098        let a = _mm512_setr_ps(
54099            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54100        );
54101        let b = _mm512_setr_ps(
54102            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54103        );
54104        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
54105        assert_eq_m512(r, a);
54106        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54107        let e = _mm512_setr_ps(
54108            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54109        );
54110        assert_eq_m512(r, e);
54111    }
54112
54113    #[simd_test(enable = "avx512f")]
54114    unsafe fn test_mm512_maskz_shuffle_f32x4() {
54115        let a = _mm512_setr_ps(
54116            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54117        );
54118        let b = _mm512_setr_ps(
54119            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54120        );
54121        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
54122        assert_eq_m512(r, _mm512_setzero_ps());
54123        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54124        let e = _mm512_setr_ps(
54125            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54126        );
54127        assert_eq_m512(r, e);
54128    }
54129
54130    #[simd_test(enable = "avx512f,avx512vl")]
54131    unsafe fn test_mm256_shuffle_f32x4() {
54132        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54133        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54134        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
54135        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54136        assert_eq_m256(r, e);
54137    }
54138
54139    #[simd_test(enable = "avx512f,avx512vl")]
54140    unsafe fn test_mm256_mask_shuffle_f32x4() {
54141        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54142        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54143        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
54144        assert_eq_m256(r, a);
54145        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
54146        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54147        assert_eq_m256(r, e);
54148    }
54149
54150    #[simd_test(enable = "avx512f,avx512vl")]
54151    unsafe fn test_mm256_maskz_shuffle_f32x4() {
54152        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54153        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54154        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
54155        assert_eq_m256(r, _mm256_setzero_ps());
54156        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
54157        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54158        assert_eq_m256(r, e);
54159    }
54160
54161    #[simd_test(enable = "avx512f")]
54162    unsafe fn test_mm512_extractf32x4_ps() {
54163        let a = _mm512_setr_ps(
54164            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54165        );
54166        let r = _mm512_extractf32x4_ps::<1>(a);
54167        let e = _mm_setr_ps(5., 6., 7., 8.);
54168        assert_eq_m128(r, e);
54169    }
54170
54171    #[simd_test(enable = "avx512f")]
54172    unsafe fn test_mm512_mask_extractf32x4_ps() {
54173        let a = _mm512_setr_ps(
54174            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54175        );
54176        let src = _mm_set1_ps(100.);
54177        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
54178        assert_eq_m128(r, src);
54179        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
54180        let e = _mm_setr_ps(5., 6., 7., 8.);
54181        assert_eq_m128(r, e);
54182    }
54183
54184    #[simd_test(enable = "avx512f")]
54185    unsafe fn test_mm512_maskz_extractf32x4_ps() {
54186        let a = _mm512_setr_ps(
54187            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54188        );
54189        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
54190        assert_eq_m128(r, _mm_setzero_ps());
54191        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
54192        let e = _mm_setr_ps(5., 0., 0., 0.);
54193        assert_eq_m128(r, e);
54194    }
54195
54196    #[simd_test(enable = "avx512f,avx512vl")]
54197    unsafe fn test_mm256_extractf32x4_ps() {
54198        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54199        let r = _mm256_extractf32x4_ps::<1>(a);
54200        let e = _mm_set_ps(1., 2., 3., 4.);
54201        assert_eq_m128(r, e);
54202    }
54203
54204    #[simd_test(enable = "avx512f,avx512vl")]
54205    unsafe fn test_mm256_mask_extractf32x4_ps() {
54206        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54207        let src = _mm_set1_ps(100.);
54208        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
54209        assert_eq_m128(r, src);
54210        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
54211        let e = _mm_set_ps(1., 2., 3., 4.);
54212        assert_eq_m128(r, e);
54213    }
54214
54215    #[simd_test(enable = "avx512f,avx512vl")]
54216    unsafe fn test_mm256_maskz_extractf32x4_ps() {
54217        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54218        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
54219        assert_eq_m128(r, _mm_setzero_ps());
54220        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
54221        let e = _mm_set_ps(1., 2., 3., 4.);
54222        assert_eq_m128(r, e);
54223    }
54224
54225    #[simd_test(enable = "avx512f")]
54226    unsafe fn test_mm512_extracti32x4_epi32() {
54227        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54228        let r = _mm512_extracti32x4_epi32::<1>(a);
54229        let e = _mm_setr_epi32(5, 6, 7, 8);
54230        assert_eq_m128i(r, e);
54231    }
54232
54233    #[simd_test(enable = "avx512f")]
54234    unsafe fn test_mm512_mask_extracti32x4_epi32() {
54235        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54236        let src = _mm_set1_epi32(100);
54237        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
54238        assert_eq_m128i(r, src);
54239        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
54240        let e = _mm_setr_epi32(5, 6, 7, 8);
54241        assert_eq_m128i(r, e);
54242    }
54243
54244    #[simd_test(enable = "avx512f,avx512vl")]
54245    unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54246        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54247        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
54248        assert_eq_m128i(r, _mm_setzero_si128());
54249        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
54250        let e = _mm_setr_epi32(5, 0, 0, 0);
54251        assert_eq_m128i(r, e);
54252    }
54253
54254    #[simd_test(enable = "avx512f,avx512vl")]
54255    unsafe fn test_mm256_extracti32x4_epi32() {
54256        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54257        let r = _mm256_extracti32x4_epi32::<1>(a);
54258        let e = _mm_set_epi32(1, 2, 3, 4);
54259        assert_eq_m128i(r, e);
54260    }
54261
54262    #[simd_test(enable = "avx512f,avx512vl")]
54263    unsafe fn test_mm256_mask_extracti32x4_epi32() {
54264        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54265        let src = _mm_set1_epi32(100);
54266        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
54267        assert_eq_m128i(r, src);
54268        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
54269        let e = _mm_set_epi32(1, 2, 3, 4);
54270        assert_eq_m128i(r, e);
54271    }
54272
54273    #[simd_test(enable = "avx512f,avx512vl")]
54274    unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54275        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54276        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
54277        assert_eq_m128i(r, _mm_setzero_si128());
54278        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
54279        let e = _mm_set_epi32(1, 2, 3, 4);
54280        assert_eq_m128i(r, e);
54281    }
54282
54283    #[simd_test(enable = "avx512f")]
54284    unsafe fn test_mm512_moveldup_ps() {
54285        let a = _mm512_setr_ps(
54286            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54287        );
54288        let r = _mm512_moveldup_ps(a);
54289        let e = _mm512_setr_ps(
54290            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54291        );
54292        assert_eq_m512(r, e);
54293    }
54294
54295    #[simd_test(enable = "avx512f")]
54296    unsafe fn test_mm512_mask_moveldup_ps() {
54297        let a = _mm512_setr_ps(
54298            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54299        );
54300        let r = _mm512_mask_moveldup_ps(a, 0, a);
54301        assert_eq_m512(r, a);
54302        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
54303        let e = _mm512_setr_ps(
54304            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54305        );
54306        assert_eq_m512(r, e);
54307    }
54308
54309    #[simd_test(enable = "avx512f")]
54310    unsafe fn test_mm512_maskz_moveldup_ps() {
54311        let a = _mm512_setr_ps(
54312            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54313        );
54314        let r = _mm512_maskz_moveldup_ps(0, a);
54315        assert_eq_m512(r, _mm512_setzero_ps());
54316        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
54317        let e = _mm512_setr_ps(
54318            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
54319        );
54320        assert_eq_m512(r, e);
54321    }
54322
54323    #[simd_test(enable = "avx512f,avx512vl")]
54324    unsafe fn test_mm256_mask_moveldup_ps() {
54325        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54326        let r = _mm256_mask_moveldup_ps(a, 0, a);
54327        assert_eq_m256(r, a);
54328        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
54329        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54330        assert_eq_m256(r, e);
54331    }
54332
54333    #[simd_test(enable = "avx512f,avx512vl")]
54334    unsafe fn test_mm256_maskz_moveldup_ps() {
54335        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54336        let r = _mm256_maskz_moveldup_ps(0, a);
54337        assert_eq_m256(r, _mm256_setzero_ps());
54338        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
54339        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54340        assert_eq_m256(r, e);
54341    }
54342
54343    #[simd_test(enable = "avx512f,avx512vl")]
54344    unsafe fn test_mm_mask_moveldup_ps() {
54345        let a = _mm_set_ps(1., 2., 3., 4.);
54346        let r = _mm_mask_moveldup_ps(a, 0, a);
54347        assert_eq_m128(r, a);
54348        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
54349        let e = _mm_set_ps(2., 2., 4., 4.);
54350        assert_eq_m128(r, e);
54351    }
54352
54353    #[simd_test(enable = "avx512f,avx512vl")]
54354    unsafe fn test_mm_maskz_moveldup_ps() {
54355        let a = _mm_set_ps(1., 2., 3., 4.);
54356        let r = _mm_maskz_moveldup_ps(0, a);
54357        assert_eq_m128(r, _mm_setzero_ps());
54358        let r = _mm_maskz_moveldup_ps(0b00001111, a);
54359        let e = _mm_set_ps(2., 2., 4., 4.);
54360        assert_eq_m128(r, e);
54361    }
54362
54363    #[simd_test(enable = "avx512f")]
54364    unsafe fn test_mm512_movehdup_ps() {
54365        let a = _mm512_setr_ps(
54366            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54367        );
54368        let r = _mm512_movehdup_ps(a);
54369        let e = _mm512_setr_ps(
54370            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54371        );
54372        assert_eq_m512(r, e);
54373    }
54374
54375    #[simd_test(enable = "avx512f")]
54376    unsafe fn test_mm512_mask_movehdup_ps() {
54377        let a = _mm512_setr_ps(
54378            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54379        );
54380        let r = _mm512_mask_movehdup_ps(a, 0, a);
54381        assert_eq_m512(r, a);
54382        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
54383        let e = _mm512_setr_ps(
54384            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54385        );
54386        assert_eq_m512(r, e);
54387    }
54388
54389    #[simd_test(enable = "avx512f")]
54390    unsafe fn test_mm512_maskz_movehdup_ps() {
54391        let a = _mm512_setr_ps(
54392            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54393        );
54394        let r = _mm512_maskz_movehdup_ps(0, a);
54395        assert_eq_m512(r, _mm512_setzero_ps());
54396        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
54397        let e = _mm512_setr_ps(
54398            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54399        );
54400        assert_eq_m512(r, e);
54401    }
54402
54403    #[simd_test(enable = "avx512f,avx512vl")]
54404    unsafe fn test_mm256_mask_movehdup_ps() {
54405        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54406        let r = _mm256_mask_movehdup_ps(a, 0, a);
54407        assert_eq_m256(r, a);
54408        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
54409        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54410        assert_eq_m256(r, e);
54411    }
54412
54413    #[simd_test(enable = "avx512f,avx512vl")]
54414    unsafe fn test_mm256_maskz_movehdup_ps() {
54415        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54416        let r = _mm256_maskz_movehdup_ps(0, a);
54417        assert_eq_m256(r, _mm256_setzero_ps());
54418        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
54419        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54420        assert_eq_m256(r, e);
54421    }
54422
54423    #[simd_test(enable = "avx512f,avx512vl")]
54424    unsafe fn test_mm_mask_movehdup_ps() {
54425        let a = _mm_set_ps(1., 2., 3., 4.);
54426        let r = _mm_mask_movehdup_ps(a, 0, a);
54427        assert_eq_m128(r, a);
54428        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
54429        let e = _mm_set_ps(1., 1., 3., 3.);
54430        assert_eq_m128(r, e);
54431    }
54432
54433    #[simd_test(enable = "avx512f,avx512vl")]
54434    unsafe fn test_mm_maskz_movehdup_ps() {
54435        let a = _mm_set_ps(1., 2., 3., 4.);
54436        let r = _mm_maskz_movehdup_ps(0, a);
54437        assert_eq_m128(r, _mm_setzero_ps());
54438        let r = _mm_maskz_movehdup_ps(0b00001111, a);
54439        let e = _mm_set_ps(1., 1., 3., 3.);
54440        assert_eq_m128(r, e);
54441    }
54442
54443    #[simd_test(enable = "avx512f")]
54444    unsafe fn test_mm512_inserti32x4() {
54445        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54446        let b = _mm_setr_epi32(17, 18, 19, 20);
54447        let r = _mm512_inserti32x4::<0>(a, b);
54448        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54449        assert_eq_m512i(r, e);
54450    }
54451
54452    #[simd_test(enable = "avx512f")]
54453    unsafe fn test_mm512_mask_inserti32x4() {
54454        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54455        let b = _mm_setr_epi32(17, 18, 19, 20);
54456        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
54457        assert_eq_m512i(r, a);
54458        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
54459        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54460        assert_eq_m512i(r, e);
54461    }
54462
54463    #[simd_test(enable = "avx512f")]
54464    unsafe fn test_mm512_maskz_inserti32x4() {
54465        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54466        let b = _mm_setr_epi32(17, 18, 19, 20);
54467        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
54468        assert_eq_m512i(r, _mm512_setzero_si512());
54469        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
54470        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54471        assert_eq_m512i(r, e);
54472    }
54473
54474    #[simd_test(enable = "avx512f,avx512vl")]
54475    unsafe fn test_mm256_inserti32x4() {
54476        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54477        let b = _mm_set_epi32(17, 18, 19, 20);
54478        let r = _mm256_inserti32x4::<1>(a, b);
54479        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54480        assert_eq_m256i(r, e);
54481    }
54482
54483    #[simd_test(enable = "avx512f,avx512vl")]
54484    unsafe fn test_mm256_mask_inserti32x4() {
54485        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54486        let b = _mm_set_epi32(17, 18, 19, 20);
54487        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
54488        assert_eq_m256i(r, a);
54489        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
54490        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54491        assert_eq_m256i(r, e);
54492    }
54493
54494    #[simd_test(enable = "avx512f,avx512vl")]
54495    unsafe fn test_mm256_maskz_inserti32x4() {
54496        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54497        let b = _mm_set_epi32(17, 18, 19, 20);
54498        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
54499        assert_eq_m256i(r, _mm256_setzero_si256());
54500        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
54501        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54502        assert_eq_m256i(r, e);
54503    }
54504
54505    #[simd_test(enable = "avx512f")]
54506    unsafe fn test_mm512_insertf32x4() {
54507        let a = _mm512_setr_ps(
54508            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54509        );
54510        let b = _mm_setr_ps(17., 18., 19., 20.);
54511        let r = _mm512_insertf32x4::<0>(a, b);
54512        let e = _mm512_setr_ps(
54513            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54514        );
54515        assert_eq_m512(r, e);
54516    }
54517
54518    #[simd_test(enable = "avx512f")]
54519    unsafe fn test_mm512_mask_insertf32x4() {
54520        let a = _mm512_setr_ps(
54521            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54522        );
54523        let b = _mm_setr_ps(17., 18., 19., 20.);
54524        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
54525        assert_eq_m512(r, a);
54526        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
54527        let e = _mm512_setr_ps(
54528            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54529        );
54530        assert_eq_m512(r, e);
54531    }
54532
54533    #[simd_test(enable = "avx512f")]
54534    unsafe fn test_mm512_maskz_insertf32x4() {
54535        let a = _mm512_setr_ps(
54536            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54537        );
54538        let b = _mm_setr_ps(17., 18., 19., 20.);
54539        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
54540        assert_eq_m512(r, _mm512_setzero_ps());
54541        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
54542        let e = _mm512_setr_ps(
54543            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54544        );
54545        assert_eq_m512(r, e);
54546    }
54547
54548    #[simd_test(enable = "avx512f,avx512vl")]
54549    unsafe fn test_mm256_insertf32x4() {
54550        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54551        let b = _mm_set_ps(17., 18., 19., 20.);
54552        let r = _mm256_insertf32x4::<1>(a, b);
54553        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54554        assert_eq_m256(r, e);
54555    }
54556
54557    #[simd_test(enable = "avx512f,avx512vl")]
54558    unsafe fn test_mm256_mask_insertf32x4() {
54559        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54560        let b = _mm_set_ps(17., 18., 19., 20.);
54561        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
54562        assert_eq_m256(r, a);
54563        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
54564        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54565        assert_eq_m256(r, e);
54566    }
54567
54568    #[simd_test(enable = "avx512f,avx512vl")]
54569    unsafe fn test_mm256_maskz_insertf32x4() {
54570        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54571        let b = _mm_set_ps(17., 18., 19., 20.);
54572        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
54573        assert_eq_m256(r, _mm256_setzero_ps());
54574        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
54575        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54576        assert_eq_m256(r, e);
54577    }
54578
54579    #[simd_test(enable = "avx512f")]
54580    unsafe fn test_mm512_castps128_ps512() {
54581        let a = _mm_setr_ps(17., 18., 19., 20.);
54582        let r = _mm512_castps128_ps512(a);
54583        assert_eq_m128(_mm512_castps512_ps128(r), a);
54584    }
54585
54586    #[simd_test(enable = "avx512f")]
54587    unsafe fn test_mm512_castps256_ps512() {
54588        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54589        let r = _mm512_castps256_ps512(a);
54590        assert_eq_m256(_mm512_castps512_ps256(r), a);
54591    }
54592
54593    #[simd_test(enable = "avx512f")]
54594    unsafe fn test_mm512_zextps128_ps512() {
54595        let a = _mm_setr_ps(17., 18., 19., 20.);
54596        let r = _mm512_zextps128_ps512(a);
54597        let e = _mm512_setr_ps(
54598            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
54599        );
54600        assert_eq_m512(r, e);
54601    }
54602
54603    #[simd_test(enable = "avx512f")]
54604    unsafe fn test_mm512_zextps256_ps512() {
54605        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54606        let r = _mm512_zextps256_ps512(a);
54607        let e = _mm512_setr_ps(
54608            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
54609        );
54610        assert_eq_m512(r, e);
54611    }
54612
54613    #[simd_test(enable = "avx512f")]
54614    unsafe fn test_mm512_castps512_ps128() {
54615        let a = _mm512_setr_ps(
54616            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
54617        );
54618        let r = _mm512_castps512_ps128(a);
54619        let e = _mm_setr_ps(17., 18., 19., 20.);
54620        assert_eq_m128(r, e);
54621    }
54622
54623    #[simd_test(enable = "avx512f")]
54624    unsafe fn test_mm512_castps512_ps256() {
54625        let a = _mm512_setr_ps(
54626            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
54627        );
54628        let r = _mm512_castps512_ps256(a);
54629        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54630        assert_eq_m256(r, e);
54631    }
54632
54633    #[simd_test(enable = "avx512f")]
54634    unsafe fn test_mm512_castps_pd() {
54635        let a = _mm512_set1_ps(1.);
54636        let r = _mm512_castps_pd(a);
54637        let e = _mm512_set1_pd(0.007812501848093234);
54638        assert_eq_m512d(r, e);
54639    }
54640
54641    #[simd_test(enable = "avx512f")]
54642    unsafe fn test_mm512_castps_si512() {
54643        let a = _mm512_set1_ps(1.);
54644        let r = _mm512_castps_si512(a);
54645        let e = _mm512_set1_epi32(1065353216);
54646        assert_eq_m512i(r, e);
54647    }
54648
54649    #[simd_test(enable = "avx512f")]
54650    unsafe fn test_mm512_broadcastd_epi32() {
54651        let a = _mm_set_epi32(17, 18, 19, 20);
54652        let r = _mm512_broadcastd_epi32(a);
54653        let e = _mm512_set1_epi32(20);
54654        assert_eq_m512i(r, e);
54655    }
54656
54657    #[simd_test(enable = "avx512f")]
54658    unsafe fn test_mm512_mask_broadcastd_epi32() {
54659        let src = _mm512_set1_epi32(20);
54660        let a = _mm_set_epi32(17, 18, 19, 20);
54661        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
54662        assert_eq_m512i(r, src);
54663        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
54664        let e = _mm512_set1_epi32(20);
54665        assert_eq_m512i(r, e);
54666    }
54667
54668    #[simd_test(enable = "avx512f")]
54669    unsafe fn test_mm512_maskz_broadcastd_epi32() {
54670        let a = _mm_set_epi32(17, 18, 19, 20);
54671        let r = _mm512_maskz_broadcastd_epi32(0, a);
54672        assert_eq_m512i(r, _mm512_setzero_si512());
54673        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
54674        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
54675        assert_eq_m512i(r, e);
54676    }
54677
54678    #[simd_test(enable = "avx512f,avx512vl")]
54679    unsafe fn test_mm256_mask_broadcastd_epi32() {
54680        let src = _mm256_set1_epi32(20);
54681        let a = _mm_set_epi32(17, 18, 19, 20);
54682        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
54683        assert_eq_m256i(r, src);
54684        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
54685        let e = _mm256_set1_epi32(20);
54686        assert_eq_m256i(r, e);
54687    }
54688
54689    #[simd_test(enable = "avx512f,avx512vl")]
54690    unsafe fn test_mm256_maskz_broadcastd_epi32() {
54691        let a = _mm_set_epi32(17, 18, 19, 20);
54692        let r = _mm256_maskz_broadcastd_epi32(0, a);
54693        assert_eq_m256i(r, _mm256_setzero_si256());
54694        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
54695        let e = _mm256_set1_epi32(20);
54696        assert_eq_m256i(r, e);
54697    }
54698
54699    #[simd_test(enable = "avx512f,avx512vl")]
54700    unsafe fn test_mm_mask_broadcastd_epi32() {
54701        let src = _mm_set1_epi32(20);
54702        let a = _mm_set_epi32(17, 18, 19, 20);
54703        let r = _mm_mask_broadcastd_epi32(src, 0, a);
54704        assert_eq_m128i(r, src);
54705        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
54706        let e = _mm_set1_epi32(20);
54707        assert_eq_m128i(r, e);
54708    }
54709
54710    #[simd_test(enable = "avx512f,avx512vl")]
54711    unsafe fn test_mm_maskz_broadcastd_epi32() {
54712        let a = _mm_set_epi32(17, 18, 19, 20);
54713        let r = _mm_maskz_broadcastd_epi32(0, a);
54714        assert_eq_m128i(r, _mm_setzero_si128());
54715        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
54716        let e = _mm_set1_epi32(20);
54717        assert_eq_m128i(r, e);
54718    }
54719
54720    #[simd_test(enable = "avx512f")]
54721    unsafe fn test_mm512_broadcastss_ps() {
54722        let a = _mm_set_ps(17., 18., 19., 20.);
54723        let r = _mm512_broadcastss_ps(a);
54724        let e = _mm512_set1_ps(20.);
54725        assert_eq_m512(r, e);
54726    }
54727
54728    #[simd_test(enable = "avx512f")]
54729    unsafe fn test_mm512_mask_broadcastss_ps() {
54730        let src = _mm512_set1_ps(20.);
54731        let a = _mm_set_ps(17., 18., 19., 20.);
54732        let r = _mm512_mask_broadcastss_ps(src, 0, a);
54733        assert_eq_m512(r, src);
54734        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
54735        let e = _mm512_set1_ps(20.);
54736        assert_eq_m512(r, e);
54737    }
54738
54739    #[simd_test(enable = "avx512f")]
54740    unsafe fn test_mm512_maskz_broadcastss_ps() {
54741        let a = _mm_set_ps(17., 18., 19., 20.);
54742        let r = _mm512_maskz_broadcastss_ps(0, a);
54743        assert_eq_m512(r, _mm512_setzero_ps());
54744        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
54745        let e = _mm512_setr_ps(
54746            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
54747        );
54748        assert_eq_m512(r, e);
54749    }
54750
54751    #[simd_test(enable = "avx512f,avx512vl")]
54752    unsafe fn test_mm256_mask_broadcastss_ps() {
54753        let src = _mm256_set1_ps(20.);
54754        let a = _mm_set_ps(17., 18., 19., 20.);
54755        let r = _mm256_mask_broadcastss_ps(src, 0, a);
54756        assert_eq_m256(r, src);
54757        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
54758        let e = _mm256_set1_ps(20.);
54759        assert_eq_m256(r, e);
54760    }
54761
54762    #[simd_test(enable = "avx512f,avx512vl")]
54763    unsafe fn test_mm256_maskz_broadcastss_ps() {
54764        let a = _mm_set_ps(17., 18., 19., 20.);
54765        let r = _mm256_maskz_broadcastss_ps(0, a);
54766        assert_eq_m256(r, _mm256_setzero_ps());
54767        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
54768        let e = _mm256_set1_ps(20.);
54769        assert_eq_m256(r, e);
54770    }
54771
54772    #[simd_test(enable = "avx512f,avx512vl")]
54773    unsafe fn test_mm_mask_broadcastss_ps() {
54774        let src = _mm_set1_ps(20.);
54775        let a = _mm_set_ps(17., 18., 19., 20.);
54776        let r = _mm_mask_broadcastss_ps(src, 0, a);
54777        assert_eq_m128(r, src);
54778        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
54779        let e = _mm_set1_ps(20.);
54780        assert_eq_m128(r, e);
54781    }
54782
54783    #[simd_test(enable = "avx512f,avx512vl")]
54784    unsafe fn test_mm_maskz_broadcastss_ps() {
54785        let a = _mm_set_ps(17., 18., 19., 20.);
54786        let r = _mm_maskz_broadcastss_ps(0, a);
54787        assert_eq_m128(r, _mm_setzero_ps());
54788        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
54789        let e = _mm_set1_ps(20.);
54790        assert_eq_m128(r, e);
54791    }
54792
54793    #[simd_test(enable = "avx512f")]
54794    unsafe fn test_mm512_broadcast_i32x4() {
54795        let a = _mm_set_epi32(17, 18, 19, 20);
54796        let r = _mm512_broadcast_i32x4(a);
54797        let e = _mm512_set_epi32(
54798            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54799        );
54800        assert_eq_m512i(r, e);
54801    }
54802
54803    #[simd_test(enable = "avx512f")]
54804    unsafe fn test_mm512_mask_broadcast_i32x4() {
54805        let src = _mm512_set1_epi32(20);
54806        let a = _mm_set_epi32(17, 18, 19, 20);
54807        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
54808        assert_eq_m512i(r, src);
54809        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
54810        let e = _mm512_set_epi32(
54811            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54812        );
54813        assert_eq_m512i(r, e);
54814    }
54815
54816    #[simd_test(enable = "avx512f")]
54817    unsafe fn test_mm512_maskz_broadcast_i32x4() {
54818        let a = _mm_set_epi32(17, 18, 19, 20);
54819        let r = _mm512_maskz_broadcast_i32x4(0, a);
54820        assert_eq_m512i(r, _mm512_setzero_si512());
54821        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
54822        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
54823        assert_eq_m512i(r, e);
54824    }
54825
54826    #[simd_test(enable = "avx512f,avx512vl")]
54827    unsafe fn test_mm256_broadcast_i32x4() {
54828        let a = _mm_set_epi32(17, 18, 19, 20);
54829        let r = _mm256_broadcast_i32x4(a);
54830        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54831        assert_eq_m256i(r, e);
54832    }
54833
54834    #[simd_test(enable = "avx512f,avx512vl")]
54835    unsafe fn test_mm256_mask_broadcast_i32x4() {
54836        let src = _mm256_set1_epi32(20);
54837        let a = _mm_set_epi32(17, 18, 19, 20);
54838        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
54839        assert_eq_m256i(r, src);
54840        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
54841        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54842        assert_eq_m256i(r, e);
54843    }
54844
54845    #[simd_test(enable = "avx512f,avx512vl")]
54846    unsafe fn test_mm256_maskz_broadcast_i32x4() {
54847        let a = _mm_set_epi32(17, 18, 19, 20);
54848        let r = _mm256_maskz_broadcast_i32x4(0, a);
54849        assert_eq_m256i(r, _mm256_setzero_si256());
54850        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
54851        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54852        assert_eq_m256i(r, e);
54853    }
54854
54855    #[simd_test(enable = "avx512f")]
54856    unsafe fn test_mm512_broadcast_f32x4() {
54857        let a = _mm_set_ps(17., 18., 19., 20.);
54858        let r = _mm512_broadcast_f32x4(a);
54859        let e = _mm512_set_ps(
54860            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54861        );
54862        assert_eq_m512(r, e);
54863    }
54864
54865    #[simd_test(enable = "avx512f")]
54866    unsafe fn test_mm512_mask_broadcast_f32x4() {
54867        let src = _mm512_set1_ps(20.);
54868        let a = _mm_set_ps(17., 18., 19., 20.);
54869        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
54870        assert_eq_m512(r, src);
54871        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
54872        let e = _mm512_set_ps(
54873            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54874        );
54875        assert_eq_m512(r, e);
54876    }
54877
54878    #[simd_test(enable = "avx512f")]
54879    unsafe fn test_mm512_maskz_broadcast_f32x4() {
54880        let a = _mm_set_ps(17., 18., 19., 20.);
54881        let r = _mm512_maskz_broadcast_f32x4(0, a);
54882        assert_eq_m512(r, _mm512_setzero_ps());
54883        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
54884        let e = _mm512_set_ps(
54885            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
54886        );
54887        assert_eq_m512(r, e);
54888    }
54889
54890    #[simd_test(enable = "avx512f,avx512vl")]
54891    unsafe fn test_mm256_broadcast_f32x4() {
54892        let a = _mm_set_ps(17., 18., 19., 20.);
54893        let r = _mm256_broadcast_f32x4(a);
54894        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54895        assert_eq_m256(r, e);
54896    }
54897
54898    #[simd_test(enable = "avx512f,avx512vl")]
54899    unsafe fn test_mm256_mask_broadcast_f32x4() {
54900        let src = _mm256_set1_ps(20.);
54901        let a = _mm_set_ps(17., 18., 19., 20.);
54902        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
54903        assert_eq_m256(r, src);
54904        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
54905        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54906        assert_eq_m256(r, e);
54907    }
54908
54909    #[simd_test(enable = "avx512f,avx512vl")]
54910    unsafe fn test_mm256_maskz_broadcast_f32x4() {
54911        let a = _mm_set_ps(17., 18., 19., 20.);
54912        let r = _mm256_maskz_broadcast_f32x4(0, a);
54913        assert_eq_m256(r, _mm256_setzero_ps());
54914        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
54915        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54916        assert_eq_m256(r, e);
54917    }
54918
54919    #[simd_test(enable = "avx512f")]
54920    unsafe fn test_mm512_mask_blend_epi32() {
54921        let a = _mm512_set1_epi32(1);
54922        let b = _mm512_set1_epi32(2);
54923        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
54924        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
54925        assert_eq_m512i(r, e);
54926    }
54927
54928    #[simd_test(enable = "avx512f,avx512vl")]
54929    unsafe fn test_mm256_mask_blend_epi32() {
54930        let a = _mm256_set1_epi32(1);
54931        let b = _mm256_set1_epi32(2);
54932        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
54933        let e = _mm256_set1_epi32(2);
54934        assert_eq_m256i(r, e);
54935    }
54936
54937    #[simd_test(enable = "avx512f,avx512vl")]
54938    unsafe fn test_mm_mask_blend_epi32() {
54939        let a = _mm_set1_epi32(1);
54940        let b = _mm_set1_epi32(2);
54941        let r = _mm_mask_blend_epi32(0b00001111, a, b);
54942        let e = _mm_set1_epi32(2);
54943        assert_eq_m128i(r, e);
54944    }
54945
54946    #[simd_test(enable = "avx512f")]
54947    unsafe fn test_mm512_mask_blend_ps() {
54948        let a = _mm512_set1_ps(1.);
54949        let b = _mm512_set1_ps(2.);
54950        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
54951        let e = _mm512_set_ps(
54952            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
54953        );
54954        assert_eq_m512(r, e);
54955    }
54956
54957    #[simd_test(enable = "avx512f,avx512vl")]
54958    unsafe fn test_mm256_mask_blend_ps() {
54959        let a = _mm256_set1_ps(1.);
54960        let b = _mm256_set1_ps(2.);
54961        let r = _mm256_mask_blend_ps(0b11111111, a, b);
54962        let e = _mm256_set1_ps(2.);
54963        assert_eq_m256(r, e);
54964    }
54965
54966    #[simd_test(enable = "avx512f,avx512vl")]
54967    unsafe fn test_mm_mask_blend_ps() {
54968        let a = _mm_set1_ps(1.);
54969        let b = _mm_set1_ps(2.);
54970        let r = _mm_mask_blend_ps(0b00001111, a, b);
54971        let e = _mm_set1_ps(2.);
54972        assert_eq_m128(r, e);
54973    }
54974
54975    #[simd_test(enable = "avx512f")]
54976    unsafe fn test_mm512_unpackhi_epi32() {
54977        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54978        let b = _mm512_set_epi32(
54979            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54980        );
54981        let r = _mm512_unpackhi_epi32(a, b);
54982        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54983        assert_eq_m512i(r, e);
54984    }
54985
54986    #[simd_test(enable = "avx512f")]
54987    unsafe fn test_mm512_mask_unpackhi_epi32() {
54988        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54989        let b = _mm512_set_epi32(
54990            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54991        );
54992        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
54993        assert_eq_m512i(r, a);
54994        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
54995        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54996        assert_eq_m512i(r, e);
54997    }
54998
54999    #[simd_test(enable = "avx512f")]
55000    unsafe fn test_mm512_maskz_unpackhi_epi32() {
55001        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55002        let b = _mm512_set_epi32(
55003            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55004        );
55005        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
55006        assert_eq_m512i(r, _mm512_setzero_si512());
55007        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
55008        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
55009        assert_eq_m512i(r, e);
55010    }
55011
55012    #[simd_test(enable = "avx512f,avx512vl")]
55013    unsafe fn test_mm256_mask_unpackhi_epi32() {
55014        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55015        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55016        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
55017        assert_eq_m256i(r, a);
55018        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
55019        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55020        assert_eq_m256i(r, e);
55021    }
55022
55023    #[simd_test(enable = "avx512f,avx512vl")]
55024    unsafe fn test_mm256_maskz_unpackhi_epi32() {
55025        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55026        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55027        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
55028        assert_eq_m256i(r, _mm256_setzero_si256());
55029        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
55030        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55031        assert_eq_m256i(r, e);
55032    }
55033
55034    #[simd_test(enable = "avx512f,avx512vl")]
55035    unsafe fn test_mm_mask_unpackhi_epi32() {
55036        let a = _mm_set_epi32(1, 2, 3, 4);
55037        let b = _mm_set_epi32(17, 18, 19, 20);
55038        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
55039        assert_eq_m128i(r, a);
55040        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
55041        let e = _mm_set_epi32(17, 1, 18, 2);
55042        assert_eq_m128i(r, e);
55043    }
55044
55045    #[simd_test(enable = "avx512f,avx512vl")]
55046    unsafe fn test_mm_maskz_unpackhi_epi32() {
55047        let a = _mm_set_epi32(1, 2, 3, 4);
55048        let b = _mm_set_epi32(17, 18, 19, 20);
55049        let r = _mm_maskz_unpackhi_epi32(0, a, b);
55050        assert_eq_m128i(r, _mm_setzero_si128());
55051        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
55052        let e = _mm_set_epi32(17, 1, 18, 2);
55053        assert_eq_m128i(r, e);
55054    }
55055
55056    #[simd_test(enable = "avx512f")]
55057    unsafe fn test_mm512_unpackhi_ps() {
55058        let a = _mm512_set_ps(
55059            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55060        );
55061        let b = _mm512_set_ps(
55062            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55063        );
55064        let r = _mm512_unpackhi_ps(a, b);
55065        let e = _mm512_set_ps(
55066            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55067        );
55068        assert_eq_m512(r, e);
55069    }
55070
55071    #[simd_test(enable = "avx512f")]
55072    unsafe fn test_mm512_mask_unpackhi_ps() {
55073        let a = _mm512_set_ps(
55074            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55075        );
55076        let b = _mm512_set_ps(
55077            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55078        );
55079        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
55080        assert_eq_m512(r, a);
55081        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
55082        let e = _mm512_set_ps(
55083            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55084        );
55085        assert_eq_m512(r, e);
55086    }
55087
55088    #[simd_test(enable = "avx512f")]
55089    unsafe fn test_mm512_maskz_unpackhi_ps() {
55090        let a = _mm512_set_ps(
55091            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55092        );
55093        let b = _mm512_set_ps(
55094            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55095        );
55096        let r = _mm512_maskz_unpackhi_ps(0, a, b);
55097        assert_eq_m512(r, _mm512_setzero_ps());
55098        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
55099        let e = _mm512_set_ps(
55100            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
55101        );
55102        assert_eq_m512(r, e);
55103    }
55104
55105    #[simd_test(enable = "avx512f,avx512vl")]
55106    unsafe fn test_mm256_mask_unpackhi_ps() {
55107        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55108        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55109        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
55110        assert_eq_m256(r, a);
55111        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
55112        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55113        assert_eq_m256(r, e);
55114    }
55115
55116    #[simd_test(enable = "avx512f,avx512vl")]
55117    unsafe fn test_mm256_maskz_unpackhi_ps() {
55118        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55119        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55120        let r = _mm256_maskz_unpackhi_ps(0, a, b);
55121        assert_eq_m256(r, _mm256_setzero_ps());
55122        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
55123        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55124        assert_eq_m256(r, e);
55125    }
55126
55127    #[simd_test(enable = "avx512f,avx512vl")]
55128    unsafe fn test_mm_mask_unpackhi_ps() {
55129        let a = _mm_set_ps(1., 2., 3., 4.);
55130        let b = _mm_set_ps(17., 18., 19., 20.);
55131        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
55132        assert_eq_m128(r, a);
55133        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
55134        let e = _mm_set_ps(17., 1., 18., 2.);
55135        assert_eq_m128(r, e);
55136    }
55137
55138    #[simd_test(enable = "avx512f,avx512vl")]
55139    unsafe fn test_mm_maskz_unpackhi_ps() {
55140        let a = _mm_set_ps(1., 2., 3., 4.);
55141        let b = _mm_set_ps(17., 18., 19., 20.);
55142        let r = _mm_maskz_unpackhi_ps(0, a, b);
55143        assert_eq_m128(r, _mm_setzero_ps());
55144        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
55145        let e = _mm_set_ps(17., 1., 18., 2.);
55146        assert_eq_m128(r, e);
55147    }
55148
55149    #[simd_test(enable = "avx512f")]
55150    unsafe fn test_mm512_unpacklo_epi32() {
55151        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55152        let b = _mm512_set_epi32(
55153            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55154        );
55155        let r = _mm512_unpacklo_epi32(a, b);
55156        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55157        assert_eq_m512i(r, e);
55158    }
55159
55160    #[simd_test(enable = "avx512f")]
55161    unsafe fn test_mm512_mask_unpacklo_epi32() {
55162        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55163        let b = _mm512_set_epi32(
55164            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55165        );
55166        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
55167        assert_eq_m512i(r, a);
55168        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
55169        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55170        assert_eq_m512i(r, e);
55171    }
55172
55173    #[simd_test(enable = "avx512f")]
55174    unsafe fn test_mm512_maskz_unpacklo_epi32() {
55175        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55176        let b = _mm512_set_epi32(
55177            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55178        );
55179        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
55180        assert_eq_m512i(r, _mm512_setzero_si512());
55181        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
55182        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
55183        assert_eq_m512i(r, e);
55184    }
55185
55186    #[simd_test(enable = "avx512f,avx512vl")]
55187    unsafe fn test_mm256_mask_unpacklo_epi32() {
55188        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55189        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55190        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
55191        assert_eq_m256i(r, a);
55192        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
55193        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55194        assert_eq_m256i(r, e);
55195    }
55196
55197    #[simd_test(enable = "avx512f,avx512vl")]
55198    unsafe fn test_mm256_maskz_unpacklo_epi32() {
55199        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55200        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55201        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
55202        assert_eq_m256i(r, _mm256_setzero_si256());
55203        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
55204        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55205        assert_eq_m256i(r, e);
55206    }
55207
55208    #[simd_test(enable = "avx512f,avx512vl")]
55209    unsafe fn test_mm_mask_unpacklo_epi32() {
55210        let a = _mm_set_epi32(1, 2, 3, 4);
55211        let b = _mm_set_epi32(17, 18, 19, 20);
55212        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
55213        assert_eq_m128i(r, a);
55214        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
55215        let e = _mm_set_epi32(19, 3, 20, 4);
55216        assert_eq_m128i(r, e);
55217    }
55218
55219    #[simd_test(enable = "avx512f,avx512vl")]
55220    unsafe fn test_mm_maskz_unpacklo_epi32() {
55221        let a = _mm_set_epi32(1, 2, 3, 4);
55222        let b = _mm_set_epi32(17, 18, 19, 20);
55223        let r = _mm_maskz_unpacklo_epi32(0, a, b);
55224        assert_eq_m128i(r, _mm_setzero_si128());
55225        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
55226        let e = _mm_set_epi32(19, 3, 20, 4);
55227        assert_eq_m128i(r, e);
55228    }
55229
55230    #[simd_test(enable = "avx512f")]
55231    unsafe fn test_mm512_unpacklo_ps() {
55232        let a = _mm512_set_ps(
55233            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55234        );
55235        let b = _mm512_set_ps(
55236            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55237        );
55238        let r = _mm512_unpacklo_ps(a, b);
55239        let e = _mm512_set_ps(
55240            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55241        );
55242        assert_eq_m512(r, e);
55243    }
55244
55245    #[simd_test(enable = "avx512f")]
55246    unsafe fn test_mm512_mask_unpacklo_ps() {
55247        let a = _mm512_set_ps(
55248            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55249        );
55250        let b = _mm512_set_ps(
55251            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55252        );
55253        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
55254        assert_eq_m512(r, a);
55255        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
55256        let e = _mm512_set_ps(
55257            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55258        );
55259        assert_eq_m512(r, e);
55260    }
55261
55262    #[simd_test(enable = "avx512f")]
55263    unsafe fn test_mm512_maskz_unpacklo_ps() {
55264        let a = _mm512_set_ps(
55265            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55266        );
55267        let b = _mm512_set_ps(
55268            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55269        );
55270        let r = _mm512_maskz_unpacklo_ps(0, a, b);
55271        assert_eq_m512(r, _mm512_setzero_ps());
55272        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
55273        let e = _mm512_set_ps(
55274            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
55275        );
55276        assert_eq_m512(r, e);
55277    }
55278
55279    #[simd_test(enable = "avx512f,avx512vl")]
55280    unsafe fn test_mm256_mask_unpacklo_ps() {
55281        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55282        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55283        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
55284        assert_eq_m256(r, a);
55285        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
55286        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55287        assert_eq_m256(r, e);
55288    }
55289
55290    #[simd_test(enable = "avx512f,avx512vl")]
55291    unsafe fn test_mm256_maskz_unpacklo_ps() {
55292        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55293        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55294        let r = _mm256_maskz_unpacklo_ps(0, a, b);
55295        assert_eq_m256(r, _mm256_setzero_ps());
55296        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
55297        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55298        assert_eq_m256(r, e);
55299    }
55300
55301    #[simd_test(enable = "avx512f,avx512vl")]
55302    unsafe fn test_mm_mask_unpacklo_ps() {
55303        let a = _mm_set_ps(1., 2., 3., 4.);
55304        let b = _mm_set_ps(17., 18., 19., 20.);
55305        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
55306        assert_eq_m128(r, a);
55307        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
55308        let e = _mm_set_ps(19., 3., 20., 4.);
55309        assert_eq_m128(r, e);
55310    }
55311
55312    #[simd_test(enable = "avx512f,avx512vl")]
55313    unsafe fn test_mm_maskz_unpacklo_ps() {
55314        let a = _mm_set_ps(1., 2., 3., 4.);
55315        let b = _mm_set_ps(17., 18., 19., 20.);
55316        let r = _mm_maskz_unpacklo_ps(0, a, b);
55317        assert_eq_m128(r, _mm_setzero_ps());
55318        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
55319        let e = _mm_set_ps(19., 3., 20., 4.);
55320        assert_eq_m128(r, e);
55321    }
55322
55323    #[simd_test(enable = "avx512f")]
55324    unsafe fn test_mm512_alignr_epi32() {
55325        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55326        let b = _mm512_set_epi32(
55327            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55328        );
55329        let r = _mm512_alignr_epi32::<0>(a, b);
55330        assert_eq_m512i(r, b);
55331        let r = _mm512_alignr_epi32::<16>(a, b);
55332        assert_eq_m512i(r, b);
55333        let r = _mm512_alignr_epi32::<1>(a, b);
55334        let e = _mm512_set_epi32(
55335            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55336        );
55337        assert_eq_m512i(r, e);
55338    }
55339
55340    #[simd_test(enable = "avx512f")]
55341    unsafe fn test_mm512_mask_alignr_epi32() {
55342        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55343        let b = _mm512_set_epi32(
55344            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55345        );
55346        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
55347        assert_eq_m512i(r, a);
55348        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
55349        let e = _mm512_set_epi32(
55350            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55351        );
55352        assert_eq_m512i(r, e);
55353    }
55354
55355    #[simd_test(enable = "avx512f")]
55356    unsafe fn test_mm512_maskz_alignr_epi32() {
55357        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55358        let b = _mm512_set_epi32(
55359            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55360        );
55361        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
55362        assert_eq_m512i(r, _mm512_setzero_si512());
55363        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
55364        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
55365        assert_eq_m512i(r, e);
55366    }
55367
55368    #[simd_test(enable = "avx512f,avx512vl")]
55369    unsafe fn test_mm256_alignr_epi32() {
55370        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55371        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55372        let r = _mm256_alignr_epi32::<0>(a, b);
55373        assert_eq_m256i(r, b);
55374        let r = _mm256_alignr_epi32::<1>(a, b);
55375        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55376        assert_eq_m256i(r, e);
55377    }
55378
55379    #[simd_test(enable = "avx512f,avx512vl")]
55380    unsafe fn test_mm256_mask_alignr_epi32() {
55381        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55382        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55383        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
55384        assert_eq_m256i(r, a);
55385        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
55386        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55387        assert_eq_m256i(r, e);
55388    }
55389
55390    #[simd_test(enable = "avx512f,avx512vl")]
55391    unsafe fn test_mm256_maskz_alignr_epi32() {
55392        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55393        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55394        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
55395        assert_eq_m256i(r, _mm256_setzero_si256());
55396        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
55397        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55398        assert_eq_m256i(r, e);
55399    }
55400
55401    #[simd_test(enable = "avx512f,avx512vl")]
55402    unsafe fn test_mm_alignr_epi32() {
55403        let a = _mm_set_epi32(4, 3, 2, 1);
55404        let b = _mm_set_epi32(8, 7, 6, 5);
55405        let r = _mm_alignr_epi32::<0>(a, b);
55406        assert_eq_m128i(r, b);
55407        let r = _mm_alignr_epi32::<1>(a, b);
55408        let e = _mm_set_epi32(1, 8, 7, 6);
55409        assert_eq_m128i(r, e);
55410    }
55411
55412    #[simd_test(enable = "avx512f,avx512vl")]
55413    unsafe fn test_mm_mask_alignr_epi32() {
55414        let a = _mm_set_epi32(4, 3, 2, 1);
55415        let b = _mm_set_epi32(8, 7, 6, 5);
55416        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
55417        assert_eq_m128i(r, a);
55418        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
55419        let e = _mm_set_epi32(1, 8, 7, 6);
55420        assert_eq_m128i(r, e);
55421    }
55422
55423    #[simd_test(enable = "avx512f,avx512vl")]
55424    unsafe fn test_mm_maskz_alignr_epi32() {
55425        let a = _mm_set_epi32(4, 3, 2, 1);
55426        let b = _mm_set_epi32(8, 7, 6, 5);
55427        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
55428        assert_eq_m128i(r, _mm_setzero_si128());
55429        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
55430        let e = _mm_set_epi32(1, 8, 7, 6);
55431        assert_eq_m128i(r, e);
55432    }
55433
55434    #[simd_test(enable = "avx512f")]
55435    unsafe fn test_mm512_and_epi32() {
55436        #[rustfmt::skip]
55437        let a = _mm512_set_epi32(
55438            1 << 1 | 1 << 2, 0, 0, 0,
55439            0, 0, 0, 0,
55440            0, 0, 0, 0,
55441            0, 0, 0, 1 << 1 | 1 << 3,
55442        );
55443        #[rustfmt::skip]
55444        let b = _mm512_set_epi32(
55445            1 << 1, 0, 0, 0,
55446            0, 0, 0, 0,
55447            0, 0, 0, 0,
55448            0, 0, 0, 1 << 3 | 1 << 4,
55449        );
55450        let r = _mm512_and_epi32(a, b);
55451        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55452        assert_eq_m512i(r, e);
55453    }
55454
55455    #[simd_test(enable = "avx512f")]
55456    unsafe fn test_mm512_mask_and_epi32() {
55457        #[rustfmt::skip]
55458        let a = _mm512_set_epi32(
55459            1 << 1 | 1 << 2, 0, 0, 0,
55460            0, 0, 0, 0,
55461            0, 0, 0, 0,
55462            0, 0, 0, 1 << 1 | 1 << 3,
55463        );
55464        #[rustfmt::skip]
55465        let b = _mm512_set_epi32(
55466            1 << 1, 0, 0, 0,
55467            0, 0, 0, 0,
55468            0, 0, 0, 0,
55469            0, 0, 0, 1 << 3 | 1 << 4,
55470        );
55471        let r = _mm512_mask_and_epi32(a, 0, a, b);
55472        assert_eq_m512i(r, a);
55473        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
55474        #[rustfmt::skip]
55475        let e = _mm512_set_epi32(
55476            1 << 1 | 1 << 2, 0, 0, 0,
55477            0, 0, 0, 0,
55478            0, 0, 0, 0,
55479            0, 0, 0, 1 << 3,
55480        );
55481        assert_eq_m512i(r, e);
55482    }
55483
55484    #[simd_test(enable = "avx512f")]
55485    unsafe fn test_mm512_maskz_and_epi32() {
55486        #[rustfmt::skip]
55487        let a = _mm512_set_epi32(
55488            1 << 1 | 1 << 2, 0, 0, 0,
55489            0, 0, 0, 0,
55490            0, 0, 0, 0,
55491            0, 0, 0, 1 << 1 | 1 << 3,
55492        );
55493        #[rustfmt::skip]
55494        let b = _mm512_set_epi32(
55495            1 << 1, 0, 0, 0,
55496            0, 0, 0, 0,
55497            0, 0, 0, 0,
55498            0, 0, 0, 1 << 3 | 1 << 4,
55499        );
55500        let r = _mm512_maskz_and_epi32(0, a, b);
55501        assert_eq_m512i(r, _mm512_setzero_si512());
55502        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
55503        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55504        assert_eq_m512i(r, e);
55505    }
55506
55507    #[simd_test(enable = "avx512f,avx512vl")]
55508    unsafe fn test_mm256_mask_and_epi32() {
55509        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55510        let b = _mm256_set1_epi32(1 << 1);
55511        let r = _mm256_mask_and_epi32(a, 0, a, b);
55512        assert_eq_m256i(r, a);
55513        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
55514        let e = _mm256_set1_epi32(1 << 1);
55515        assert_eq_m256i(r, e);
55516    }
55517
55518    #[simd_test(enable = "avx512f,avx512vl")]
55519    unsafe fn test_mm256_maskz_and_epi32() {
55520        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55521        let b = _mm256_set1_epi32(1 << 1);
55522        let r = _mm256_maskz_and_epi32(0, a, b);
55523        assert_eq_m256i(r, _mm256_setzero_si256());
55524        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
55525        let e = _mm256_set1_epi32(1 << 1);
55526        assert_eq_m256i(r, e);
55527    }
55528
55529    #[simd_test(enable = "avx512f,avx512vl")]
55530    unsafe fn test_mm_mask_and_epi32() {
55531        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55532        let b = _mm_set1_epi32(1 << 1);
55533        let r = _mm_mask_and_epi32(a, 0, a, b);
55534        assert_eq_m128i(r, a);
55535        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
55536        let e = _mm_set1_epi32(1 << 1);
55537        assert_eq_m128i(r, e);
55538    }
55539
55540    #[simd_test(enable = "avx512f,avx512vl")]
55541    unsafe fn test_mm_maskz_and_epi32() {
55542        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55543        let b = _mm_set1_epi32(1 << 1);
55544        let r = _mm_maskz_and_epi32(0, a, b);
55545        assert_eq_m128i(r, _mm_setzero_si128());
55546        let r = _mm_maskz_and_epi32(0b00001111, a, b);
55547        let e = _mm_set1_epi32(1 << 1);
55548        assert_eq_m128i(r, e);
55549    }
55550
55551    #[simd_test(enable = "avx512f")]
55552    unsafe fn test_mm512_and_si512() {
55553        #[rustfmt::skip]
55554        let a = _mm512_set_epi32(
55555            1 << 1 | 1 << 2, 0, 0, 0,
55556            0, 0, 0, 0,
55557            0, 0, 0, 0,
55558            0, 0, 0, 1 << 1 | 1 << 3,
55559        );
55560        #[rustfmt::skip]
55561        let b = _mm512_set_epi32(
55562            1 << 1, 0, 0, 0,
55563            0, 0, 0, 0,
55564            0, 0, 0, 0,
55565            0, 0, 0, 1 << 3 | 1 << 4,
55566        );
55567        let r = _mm512_and_epi32(a, b);
55568        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55569        assert_eq_m512i(r, e);
55570    }
55571
55572    #[simd_test(enable = "avx512f")]
55573    unsafe fn test_mm512_or_epi32() {
55574        #[rustfmt::skip]
55575        let a = _mm512_set_epi32(
55576            1 << 1 | 1 << 2, 0, 0, 0,
55577            0, 0, 0, 0,
55578            0, 0, 0, 0,
55579            0, 0, 0, 1 << 1 | 1 << 3,
55580        );
55581        #[rustfmt::skip]
55582        let b = _mm512_set_epi32(
55583            1 << 1, 0, 0, 0,
55584            0, 0, 0, 0,
55585            0, 0, 0, 0,
55586            0, 0, 0, 1 << 3 | 1 << 4,
55587        );
55588        let r = _mm512_or_epi32(a, b);
55589        #[rustfmt::skip]
55590        let e = _mm512_set_epi32(
55591            1 << 1 | 1 << 2, 0, 0, 0,
55592            0, 0, 0, 0,
55593            0, 0, 0, 0,
55594            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55595        );
55596        assert_eq_m512i(r, e);
55597    }
55598
55599    #[simd_test(enable = "avx512f")]
55600    unsafe fn test_mm512_mask_or_epi32() {
55601        #[rustfmt::skip]
55602        let a = _mm512_set_epi32(
55603            1 << 1 | 1 << 2, 0, 0, 0,
55604            0, 0, 0, 0,
55605            0, 0, 0, 0,
55606            0, 0, 0, 1 << 1 | 1 << 3,
55607        );
55608        #[rustfmt::skip]
55609        let b = _mm512_set_epi32(
55610            1 << 1, 0, 0, 0,
55611            0, 0, 0, 0,
55612            0, 0, 0, 0,
55613            0, 0, 0, 1 << 3 | 1 << 4,
55614        );
55615        let r = _mm512_mask_or_epi32(a, 0, a, b);
55616        assert_eq_m512i(r, a);
55617        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
55618        #[rustfmt::skip]
55619        let e = _mm512_set_epi32(
55620            1 << 1 | 1 << 2, 0, 0, 0,
55621            0, 0, 0, 0,
55622            0, 0, 0, 0,
55623            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55624        );
55625        assert_eq_m512i(r, e);
55626    }
55627
55628    #[simd_test(enable = "avx512f")]
55629    unsafe fn test_mm512_maskz_or_epi32() {
55630        #[rustfmt::skip]
55631        let a = _mm512_set_epi32(
55632            1 << 1 | 1 << 2, 0, 0, 0,
55633            0, 0, 0, 0,
55634            0, 0, 0, 0,
55635            0, 0, 0, 1 << 1 | 1 << 3,
55636        );
55637        #[rustfmt::skip]
55638        let b = _mm512_set_epi32(
55639            1 << 1, 0, 0, 0,
55640            0, 0, 0, 0,
55641            0, 0, 0, 0,
55642            0, 0, 0, 1 << 3 | 1 << 4,
55643        );
55644        let r = _mm512_maskz_or_epi32(0, a, b);
55645        assert_eq_m512i(r, _mm512_setzero_si512());
55646        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
55647        #[rustfmt::skip]
55648        let e = _mm512_set_epi32(
55649            0, 0, 0, 0,
55650            0, 0, 0, 0,
55651            0, 0, 0, 0,
55652            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55653        );
55654        assert_eq_m512i(r, e);
55655    }
55656
55657    #[simd_test(enable = "avx512f,avx512vl")]
55658    unsafe fn test_mm256_or_epi32() {
55659        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55660        let b = _mm256_set1_epi32(1 << 1);
55661        let r = _mm256_or_epi32(a, b);
55662        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55663        assert_eq_m256i(r, e);
55664    }
55665
55666    #[simd_test(enable = "avx512f,avx512vl")]
55667    unsafe fn test_mm256_mask_or_epi32() {
55668        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55669        let b = _mm256_set1_epi32(1 << 1);
55670        let r = _mm256_mask_or_epi32(a, 0, a, b);
55671        assert_eq_m256i(r, a);
55672        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
55673        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55674        assert_eq_m256i(r, e);
55675    }
55676
55677    #[simd_test(enable = "avx512f,avx512vl")]
55678    unsafe fn test_mm256_maskz_or_epi32() {
55679        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55680        let b = _mm256_set1_epi32(1 << 1);
55681        let r = _mm256_maskz_or_epi32(0, a, b);
55682        assert_eq_m256i(r, _mm256_setzero_si256());
55683        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
55684        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55685        assert_eq_m256i(r, e);
55686    }
55687
55688    #[simd_test(enable = "avx512f,avx512vl")]
55689    unsafe fn test_mm_or_epi32() {
55690        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55691        let b = _mm_set1_epi32(1 << 1);
55692        let r = _mm_or_epi32(a, b);
55693        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55694        assert_eq_m128i(r, e);
55695    }
55696
55697    #[simd_test(enable = "avx512f,avx512vl")]
55698    unsafe fn test_mm_mask_or_epi32() {
55699        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55700        let b = _mm_set1_epi32(1 << 1);
55701        let r = _mm_mask_or_epi32(a, 0, a, b);
55702        assert_eq_m128i(r, a);
55703        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
55704        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55705        assert_eq_m128i(r, e);
55706    }
55707
55708    #[simd_test(enable = "avx512f,avx512vl")]
55709    unsafe fn test_mm_maskz_or_epi32() {
55710        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55711        let b = _mm_set1_epi32(1 << 1);
55712        let r = _mm_maskz_or_epi32(0, a, b);
55713        assert_eq_m128i(r, _mm_setzero_si128());
55714        let r = _mm_maskz_or_epi32(0b00001111, a, b);
55715        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55716        assert_eq_m128i(r, e);
55717    }
55718
55719    #[simd_test(enable = "avx512f")]
55720    unsafe fn test_mm512_or_si512() {
55721        #[rustfmt::skip]
55722        let a = _mm512_set_epi32(
55723            1 << 1 | 1 << 2, 0, 0, 0,
55724            0, 0, 0, 0,
55725            0, 0, 0, 0,
55726            0, 0, 0, 1 << 1 | 1 << 3,
55727        );
55728        #[rustfmt::skip]
55729        let b = _mm512_set_epi32(
55730            1 << 1, 0, 0, 0,
55731            0, 0, 0, 0,
55732            0, 0, 0, 0,
55733            0, 0, 0, 1 << 3 | 1 << 4,
55734        );
55735        let r = _mm512_or_epi32(a, b);
55736        #[rustfmt::skip]
55737        let e = _mm512_set_epi32(
55738            1 << 1 | 1 << 2, 0, 0, 0,
55739            0, 0, 0, 0,
55740            0, 0, 0, 0,
55741            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55742        );
55743        assert_eq_m512i(r, e);
55744    }
55745
55746    #[simd_test(enable = "avx512f")]
55747    unsafe fn test_mm512_xor_epi32() {
55748        #[rustfmt::skip]
55749        let a = _mm512_set_epi32(
55750            1 << 1 | 1 << 2, 0, 0, 0,
55751            0, 0, 0, 0,
55752            0, 0, 0, 0,
55753            0, 0, 0, 1 << 1 | 1 << 3,
55754        );
55755        #[rustfmt::skip]
55756        let b = _mm512_set_epi32(
55757            1 << 1, 0, 0, 0,
55758            0, 0, 0, 0,
55759            0, 0, 0, 0,
55760            0, 0, 0, 1 << 3 | 1 << 4,
55761        );
55762        let r = _mm512_xor_epi32(a, b);
55763        #[rustfmt::skip]
55764        let e = _mm512_set_epi32(
55765            1 << 2, 0, 0, 0,
55766            0, 0, 0, 0,
55767            0, 0, 0, 0,
55768            0, 0, 0, 1 << 1 | 1 << 4,
55769        );
55770        assert_eq_m512i(r, e);
55771    }
55772
55773    #[simd_test(enable = "avx512f")]
55774    unsafe fn test_mm512_mask_xor_epi32() {
55775        #[rustfmt::skip]
55776        let a = _mm512_set_epi32(
55777            1 << 1 | 1 << 2, 0, 0, 0,
55778            0, 0, 0, 0,
55779            0, 0, 0, 0,
55780            0, 0, 0, 1 << 1 | 1 << 3,
55781        );
55782        #[rustfmt::skip]
55783        let b = _mm512_set_epi32(
55784            1 << 1, 0, 0, 0,
55785            0, 0, 0, 0,
55786            0, 0, 0, 0,
55787            0, 0, 0, 1 << 3 | 1 << 4,
55788        );
55789        let r = _mm512_mask_xor_epi32(a, 0, a, b);
55790        assert_eq_m512i(r, a);
55791        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
55792        #[rustfmt::skip]
55793        let e = _mm512_set_epi32(
55794            1 << 1 | 1 << 2, 0, 0, 0,
55795            0, 0, 0, 0,
55796            0, 0, 0, 0,
55797            0, 0, 0, 1 << 1 | 1 << 4,
55798        );
55799        assert_eq_m512i(r, e);
55800    }
55801
55802    #[simd_test(enable = "avx512f")]
55803    unsafe fn test_mm512_maskz_xor_epi32() {
55804        #[rustfmt::skip]
55805        let a = _mm512_set_epi32(
55806            1 << 1 | 1 << 2, 0, 0, 0,
55807            0, 0, 0, 0,
55808            0, 0, 0, 0,
55809            0, 0, 0, 1 << 1 | 1 << 3,
55810        );
55811        #[rustfmt::skip]
55812        let b = _mm512_set_epi32(
55813            1 << 1, 0, 0, 0,
55814            0, 0, 0, 0,
55815            0, 0, 0, 0,
55816            0, 0, 0, 1 << 3 | 1 << 4,
55817        );
55818        let r = _mm512_maskz_xor_epi32(0, a, b);
55819        assert_eq_m512i(r, _mm512_setzero_si512());
55820        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
55821        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
55822        assert_eq_m512i(r, e);
55823    }
55824
55825    #[simd_test(enable = "avx512f,avx512vl")]
55826    unsafe fn test_mm256_xor_epi32() {
55827        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55828        let b = _mm256_set1_epi32(1 << 1);
55829        let r = _mm256_xor_epi32(a, b);
55830        let e = _mm256_set1_epi32(1 << 2);
55831        assert_eq_m256i(r, e);
55832    }
55833
55834    #[simd_test(enable = "avx512f,avx512vl")]
55835    unsafe fn test_mm256_mask_xor_epi32() {
55836        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55837        let b = _mm256_set1_epi32(1 << 1);
55838        let r = _mm256_mask_xor_epi32(a, 0, a, b);
55839        assert_eq_m256i(r, a);
55840        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
55841        let e = _mm256_set1_epi32(1 << 2);
55842        assert_eq_m256i(r, e);
55843    }
55844
55845    #[simd_test(enable = "avx512f,avx512vl")]
55846    unsafe fn test_mm256_maskz_xor_epi32() {
55847        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55848        let b = _mm256_set1_epi32(1 << 1);
55849        let r = _mm256_maskz_xor_epi32(0, a, b);
55850        assert_eq_m256i(r, _mm256_setzero_si256());
55851        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
55852        let e = _mm256_set1_epi32(1 << 2);
55853        assert_eq_m256i(r, e);
55854    }
55855
55856    #[simd_test(enable = "avx512f,avx512vl")]
55857    unsafe fn test_mm_xor_epi32() {
55858        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55859        let b = _mm_set1_epi32(1 << 1);
55860        let r = _mm_xor_epi32(a, b);
55861        let e = _mm_set1_epi32(1 << 2);
55862        assert_eq_m128i(r, e);
55863    }
55864
55865    #[simd_test(enable = "avx512f,avx512vl")]
55866    unsafe fn test_mm_mask_xor_epi32() {
55867        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55868        let b = _mm_set1_epi32(1 << 1);
55869        let r = _mm_mask_xor_epi32(a, 0, a, b);
55870        assert_eq_m128i(r, a);
55871        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
55872        let e = _mm_set1_epi32(1 << 2);
55873        assert_eq_m128i(r, e);
55874    }
55875
55876    #[simd_test(enable = "avx512f,avx512vl")]
55877    unsafe fn test_mm_maskz_xor_epi32() {
55878        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55879        let b = _mm_set1_epi32(1 << 1);
55880        let r = _mm_maskz_xor_epi32(0, a, b);
55881        assert_eq_m128i(r, _mm_setzero_si128());
55882        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
55883        let e = _mm_set1_epi32(1 << 2);
55884        assert_eq_m128i(r, e);
55885    }
55886
55887    #[simd_test(enable = "avx512f")]
55888    unsafe fn test_mm512_xor_si512() {
55889        #[rustfmt::skip]
55890        let a = _mm512_set_epi32(
55891            1 << 1 | 1 << 2, 0, 0, 0,
55892            0, 0, 0, 0,
55893            0, 0, 0, 0,
55894            0, 0, 0, 1 << 1 | 1 << 3,
55895        );
55896        #[rustfmt::skip]
55897        let b = _mm512_set_epi32(
55898            1 << 1, 0, 0, 0,
55899            0, 0, 0, 0,
55900            0, 0, 0, 0,
55901            0, 0, 0, 1 << 3 | 1 << 4,
55902        );
55903        let r = _mm512_xor_epi32(a, b);
55904        #[rustfmt::skip]
55905        let e = _mm512_set_epi32(
55906            1 << 2, 0, 0, 0,
55907            0, 0, 0, 0,
55908            0, 0, 0, 0,
55909            0, 0, 0, 1 << 1 | 1 << 4,
55910        );
55911        assert_eq_m512i(r, e);
55912    }
55913
55914    #[simd_test(enable = "avx512f")]
55915    unsafe fn test_mm512_andnot_epi32() {
55916        let a = _mm512_set1_epi32(0);
55917        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55918        let r = _mm512_andnot_epi32(a, b);
55919        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55920        assert_eq_m512i(r, e);
55921    }
55922
55923    #[simd_test(enable = "avx512f")]
55924    unsafe fn test_mm512_mask_andnot_epi32() {
55925        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55926        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55927        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
55928        assert_eq_m512i(r, a);
55929        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
55930        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55931        assert_eq_m512i(r, e);
55932    }
55933
55934    #[simd_test(enable = "avx512f")]
55935    unsafe fn test_mm512_maskz_andnot_epi32() {
55936        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55937        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55938        let r = _mm512_maskz_andnot_epi32(0, a, b);
55939        assert_eq_m512i(r, _mm512_setzero_si512());
55940        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
55941        #[rustfmt::skip]
55942        let e = _mm512_set_epi32(
55943            0, 0, 0, 0,
55944            0, 0, 0, 0,
55945            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55946            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55947        );
55948        assert_eq_m512i(r, e);
55949    }
55950
55951    #[simd_test(enable = "avx512f,avx512vl")]
55952    unsafe fn test_mm256_mask_andnot_epi32() {
55953        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55954        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55955        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
55956        assert_eq_m256i(r, a);
55957        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
55958        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55959        assert_eq_m256i(r, e);
55960    }
55961
55962    #[simd_test(enable = "avx512f,avx512vl")]
55963    unsafe fn test_mm256_maskz_andnot_epi32() {
55964        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55965        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55966        let r = _mm256_maskz_andnot_epi32(0, a, b);
55967        assert_eq_m256i(r, _mm256_setzero_si256());
55968        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
55969        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55970        assert_eq_m256i(r, e);
55971    }
55972
55973    #[simd_test(enable = "avx512f,avx512vl")]
55974    unsafe fn test_mm_mask_andnot_epi32() {
55975        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55976        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55977        let r = _mm_mask_andnot_epi32(a, 0, a, b);
55978        assert_eq_m128i(r, a);
55979        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
55980        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55981        assert_eq_m128i(r, e);
55982    }
55983
55984    #[simd_test(enable = "avx512f,avx512vl")]
55985    unsafe fn test_mm_maskz_andnot_epi32() {
55986        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55987        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55988        let r = _mm_maskz_andnot_epi32(0, a, b);
55989        assert_eq_m128i(r, _mm_setzero_si128());
55990        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
55991        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55992        assert_eq_m128i(r, e);
55993    }
55994
55995    #[simd_test(enable = "avx512f")]
55996    unsafe fn test_cvtmask16_u32() {
55997        let a: __mmask16 = 0b11001100_00110011;
55998        let r = _cvtmask16_u32(a);
55999        let e: u32 = 0b11001100_00110011;
56000        assert_eq!(r, e);
56001    }
56002
56003    #[simd_test(enable = "avx512f")]
56004    unsafe fn test_cvtu32_mask16() {
56005        let a: u32 = 0b11001100_00110011;
56006        let r = _cvtu32_mask16(a);
56007        let e: __mmask16 = 0b11001100_00110011;
56008        assert_eq!(r, e);
56009    }
56010
56011    #[simd_test(enable = "avx512f")]
56012    unsafe fn test_mm512_kand() {
56013        let a: u16 = 0b11001100_00110011;
56014        let b: u16 = 0b11001100_00110011;
56015        let r = _mm512_kand(a, b);
56016        let e: u16 = 0b11001100_00110011;
56017        assert_eq!(r, e);
56018    }
56019
56020    #[simd_test(enable = "avx512f")]
56021    unsafe fn test_kand_mask16() {
56022        let a: u16 = 0b11001100_00110011;
56023        let b: u16 = 0b11001100_00110011;
56024        let r = _kand_mask16(a, b);
56025        let e: u16 = 0b11001100_00110011;
56026        assert_eq!(r, e);
56027    }
56028
56029    #[simd_test(enable = "avx512f")]
56030    unsafe fn test_mm512_kor() {
56031        let a: u16 = 0b11001100_00110011;
56032        let b: u16 = 0b00101110_00001011;
56033        let r = _mm512_kor(a, b);
56034        let e: u16 = 0b11101110_00111011;
56035        assert_eq!(r, e);
56036    }
56037
56038    #[simd_test(enable = "avx512f")]
56039    unsafe fn test_kor_mask16() {
56040        let a: u16 = 0b11001100_00110011;
56041        let b: u16 = 0b00101110_00001011;
56042        let r = _kor_mask16(a, b);
56043        let e: u16 = 0b11101110_00111011;
56044        assert_eq!(r, e);
56045    }
56046
56047    #[simd_test(enable = "avx512f")]
56048    unsafe fn test_mm512_kxor() {
56049        let a: u16 = 0b11001100_00110011;
56050        let b: u16 = 0b00101110_00001011;
56051        let r = _mm512_kxor(a, b);
56052        let e: u16 = 0b11100010_00111000;
56053        assert_eq!(r, e);
56054    }
56055
56056    #[simd_test(enable = "avx512f")]
56057    unsafe fn test_kxor_mask16() {
56058        let a: u16 = 0b11001100_00110011;
56059        let b: u16 = 0b00101110_00001011;
56060        let r = _kxor_mask16(a, b);
56061        let e: u16 = 0b11100010_00111000;
56062        assert_eq!(r, e);
56063    }
56064
56065    #[simd_test(enable = "avx512f")]
56066    unsafe fn test_mm512_knot() {
56067        let a: u16 = 0b11001100_00110011;
56068        let r = _mm512_knot(a);
56069        let e: u16 = 0b00110011_11001100;
56070        assert_eq!(r, e);
56071    }
56072
56073    #[simd_test(enable = "avx512f")]
56074    unsafe fn test_knot_mask16() {
56075        let a: u16 = 0b11001100_00110011;
56076        let r = _knot_mask16(a);
56077        let e: u16 = 0b00110011_11001100;
56078        assert_eq!(r, e);
56079    }
56080
56081    #[simd_test(enable = "avx512f")]
56082    unsafe fn test_mm512_kandn() {
56083        let a: u16 = 0b11001100_00110011;
56084        let b: u16 = 0b00101110_00001011;
56085        let r = _mm512_kandn(a, b);
56086        let e: u16 = 0b00100010_00001000;
56087        assert_eq!(r, e);
56088    }
56089
56090    #[simd_test(enable = "avx512f")]
56091    unsafe fn test_kandn_mask16() {
56092        let a: u16 = 0b11001100_00110011;
56093        let b: u16 = 0b00101110_00001011;
56094        let r = _kandn_mask16(a, b);
56095        let e: u16 = 0b00100010_00001000;
56096        assert_eq!(r, e);
56097    }
56098
56099    #[simd_test(enable = "avx512f")]
56100    unsafe fn test_mm512_kxnor() {
56101        let a: u16 = 0b11001100_00110011;
56102        let b: u16 = 0b00101110_00001011;
56103        let r = _mm512_kxnor(a, b);
56104        let e: u16 = 0b00011101_11000111;
56105        assert_eq!(r, e);
56106    }
56107
56108    #[simd_test(enable = "avx512f")]
56109    unsafe fn test_kxnor_mask16() {
56110        let a: u16 = 0b11001100_00110011;
56111        let b: u16 = 0b00101110_00001011;
56112        let r = _kxnor_mask16(a, b);
56113        let e: u16 = 0b00011101_11000111;
56114        assert_eq!(r, e);
56115    }
56116
56117    #[simd_test(enable = "avx512dq")]
56118    unsafe fn test_kortest_mask16_u8() {
56119        let a: __mmask16 = 0b0110100101101001;
56120        let b: __mmask16 = 0b1011011010110110;
56121        let mut all_ones: u8 = 0;
56122        let r = _kortest_mask16_u8(a, b, &mut all_ones);
56123        assert_eq!(r, 0);
56124        assert_eq!(all_ones, 1);
56125    }
56126
56127    #[simd_test(enable = "avx512dq")]
56128    unsafe fn test_kortestc_mask16_u8() {
56129        let a: __mmask16 = 0b0110100101101001;
56130        let b: __mmask16 = 0b1011011010110110;
56131        let r = _kortestc_mask16_u8(a, b);
56132        assert_eq!(r, 1);
56133    }
56134
56135    #[simd_test(enable = "avx512dq")]
56136    unsafe fn test_kortestz_mask16_u8() {
56137        let a: __mmask16 = 0b0110100101101001;
56138        let b: __mmask16 = 0b1011011010110110;
56139        let r = _kortestz_mask16_u8(a, b);
56140        assert_eq!(r, 0);
56141    }
56142
56143    #[simd_test(enable = "avx512dq")]
56144    unsafe fn test_kshiftli_mask16() {
56145        let a: __mmask16 = 0b1001011011000011;
56146        let r = _kshiftli_mask16::<3>(a);
56147        let e: __mmask16 = 0b1011011000011000;
56148        assert_eq!(r, e);
56149    }
56150
56151    #[simd_test(enable = "avx512dq")]
56152    unsafe fn test_kshiftri_mask16() {
56153        let a: __mmask16 = 0b0110100100111100;
56154        let r = _kshiftri_mask16::<3>(a);
56155        let e: __mmask16 = 0b0000110100100111;
56156        assert_eq!(r, e);
56157    }
56158
56159    #[simd_test(enable = "avx512f")]
56160    unsafe fn test_load_mask16() {
56161        let a: __mmask16 = 0b1001011011000011;
56162        let r = _load_mask16(&a);
56163        let e: __mmask16 = 0b1001011011000011;
56164        assert_eq!(r, e);
56165    }
56166
56167    #[simd_test(enable = "avx512f")]
56168    unsafe fn test_store_mask16() {
56169        let a: __mmask16 = 0b0110100100111100;
56170        let mut r = 0;
56171        _store_mask16(&mut r, a);
56172        let e: __mmask16 = 0b0110100100111100;
56173        assert_eq!(r, e);
56174    }
56175
56176    #[simd_test(enable = "avx512f")]
56177    unsafe fn test_mm512_kmov() {
56178        let a: u16 = 0b11001100_00110011;
56179        let r = _mm512_kmov(a);
56180        let e: u16 = 0b11001100_00110011;
56181        assert_eq!(r, e);
56182    }
56183
56184    #[simd_test(enable = "avx512f")]
56185    unsafe fn test_mm512_int2mask() {
56186        let a: i32 = 0b11001100_00110011;
56187        let r = _mm512_int2mask(a);
56188        let e: u16 = 0b11001100_00110011;
56189        assert_eq!(r, e);
56190    }
56191
56192    #[simd_test(enable = "avx512f")]
56193    unsafe fn test_mm512_mask2int() {
56194        let k1: __mmask16 = 0b11001100_00110011;
56195        let r = _mm512_mask2int(k1);
56196        let e: i32 = 0b11001100_00110011;
56197        assert_eq!(r, e);
56198    }
56199
56200    #[simd_test(enable = "avx512f")]
56201    unsafe fn test_mm512_kunpackb() {
56202        let a: u16 = 0b11001100_00110011;
56203        let b: u16 = 0b00101110_00001011;
56204        let r = _mm512_kunpackb(a, b);
56205        let e: u16 = 0b00110011_00001011;
56206        assert_eq!(r, e);
56207    }
56208
56209    #[simd_test(enable = "avx512f")]
56210    unsafe fn test_mm512_kortestc() {
56211        let a: u16 = 0b11001100_00110011;
56212        let b: u16 = 0b00101110_00001011;
56213        let r = _mm512_kortestc(a, b);
56214        assert_eq!(r, 0);
56215        let b: u16 = 0b11111111_11111111;
56216        let r = _mm512_kortestc(a, b);
56217        assert_eq!(r, 1);
56218    }
56219
56220    #[simd_test(enable = "avx512f")]
56221    unsafe fn test_mm512_kortestz() {
56222        let a: u16 = 0b11001100_00110011;
56223        let b: u16 = 0b00101110_00001011;
56224        let r = _mm512_kortestz(a, b);
56225        assert_eq!(r, 0);
56226        let r = _mm512_kortestz(0, 0);
56227        assert_eq!(r, 1);
56228    }
56229
56230    #[simd_test(enable = "avx512f")]
56231    unsafe fn test_mm512_test_epi32_mask() {
56232        let a = _mm512_set1_epi32(1 << 0);
56233        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56234        let r = _mm512_test_epi32_mask(a, b);
56235        let e: __mmask16 = 0b11111111_11111111;
56236        assert_eq!(r, e);
56237    }
56238
56239    #[simd_test(enable = "avx512f")]
56240    unsafe fn test_mm512_mask_test_epi32_mask() {
56241        let a = _mm512_set1_epi32(1 << 0);
56242        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56243        let r = _mm512_mask_test_epi32_mask(0, a, b);
56244        assert_eq!(r, 0);
56245        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
56246        let e: __mmask16 = 0b11111111_11111111;
56247        assert_eq!(r, e);
56248    }
56249
56250    #[simd_test(enable = "avx512f,avx512vl")]
56251    unsafe fn test_mm256_test_epi32_mask() {
56252        let a = _mm256_set1_epi32(1 << 0);
56253        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56254        let r = _mm256_test_epi32_mask(a, b);
56255        let e: __mmask8 = 0b11111111;
56256        assert_eq!(r, e);
56257    }
56258
56259    #[simd_test(enable = "avx512f,avx512vl")]
56260    unsafe fn test_mm256_mask_test_epi32_mask() {
56261        let a = _mm256_set1_epi32(1 << 0);
56262        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56263        let r = _mm256_mask_test_epi32_mask(0, a, b);
56264        assert_eq!(r, 0);
56265        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
56266        let e: __mmask8 = 0b11111111;
56267        assert_eq!(r, e);
56268    }
56269
56270    #[simd_test(enable = "avx512f,avx512vl")]
56271    unsafe fn test_mm_test_epi32_mask() {
56272        let a = _mm_set1_epi32(1 << 0);
56273        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56274        let r = _mm_test_epi32_mask(a, b);
56275        let e: __mmask8 = 0b00001111;
56276        assert_eq!(r, e);
56277    }
56278
56279    #[simd_test(enable = "avx512f,avx512vl")]
56280    unsafe fn test_mm_mask_test_epi32_mask() {
56281        let a = _mm_set1_epi32(1 << 0);
56282        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56283        let r = _mm_mask_test_epi32_mask(0, a, b);
56284        assert_eq!(r, 0);
56285        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
56286        let e: __mmask8 = 0b00001111;
56287        assert_eq!(r, e);
56288    }
56289
56290    #[simd_test(enable = "avx512f")]
56291    unsafe fn test_mm512_testn_epi32_mask() {
56292        let a = _mm512_set1_epi32(1 << 0);
56293        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56294        let r = _mm512_testn_epi32_mask(a, b);
56295        let e: __mmask16 = 0b00000000_00000000;
56296        assert_eq!(r, e);
56297    }
56298
56299    #[simd_test(enable = "avx512f")]
56300    unsafe fn test_mm512_mask_testn_epi32_mask() {
56301        let a = _mm512_set1_epi32(1 << 0);
56302        let b = _mm512_set1_epi32(1 << 1);
56303        let r = _mm512_mask_test_epi32_mask(0, a, b);
56304        assert_eq!(r, 0);
56305        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
56306        let e: __mmask16 = 0b11111111_11111111;
56307        assert_eq!(r, e);
56308    }
56309
56310    #[simd_test(enable = "avx512f,avx512vl")]
56311    unsafe fn test_mm256_testn_epi32_mask() {
56312        let a = _mm256_set1_epi32(1 << 0);
56313        let b = _mm256_set1_epi32(1 << 1);
56314        let r = _mm256_testn_epi32_mask(a, b);
56315        let e: __mmask8 = 0b11111111;
56316        assert_eq!(r, e);
56317    }
56318
56319    #[simd_test(enable = "avx512f,avx512vl")]
56320    unsafe fn test_mm256_mask_testn_epi32_mask() {
56321        let a = _mm256_set1_epi32(1 << 0);
56322        let b = _mm256_set1_epi32(1 << 1);
56323        let r = _mm256_mask_test_epi32_mask(0, a, b);
56324        assert_eq!(r, 0);
56325        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
56326        let e: __mmask8 = 0b11111111;
56327        assert_eq!(r, e);
56328    }
56329
56330    #[simd_test(enable = "avx512f,avx512vl")]
56331    unsafe fn test_mm_testn_epi32_mask() {
56332        let a = _mm_set1_epi32(1 << 0);
56333        let b = _mm_set1_epi32(1 << 1);
56334        let r = _mm_testn_epi32_mask(a, b);
56335        let e: __mmask8 = 0b00001111;
56336        assert_eq!(r, e);
56337    }
56338
56339    #[simd_test(enable = "avx512f,avx512vl")]
56340    unsafe fn test_mm_mask_testn_epi32_mask() {
56341        let a = _mm_set1_epi32(1 << 0);
56342        let b = _mm_set1_epi32(1 << 1);
56343        let r = _mm_mask_test_epi32_mask(0, a, b);
56344        assert_eq!(r, 0);
56345        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
56346        let e: __mmask8 = 0b00001111;
56347        assert_eq!(r, e);
56348    }
56349
56350    #[simd_test(enable = "avx512f")]
56351    #[cfg_attr(miri, ignore)]
56352    unsafe fn test_mm512_stream_ps() {
56353        #[repr(align(64))]
56354        struct Memory {
56355            pub data: [f32; 16], // 64 bytes
56356        }
56357        let a = _mm512_set1_ps(7.0);
56358        let mut mem = Memory { data: [-1.0; 16] };
56359
56360        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
56361        for i in 0..16 {
56362            assert_eq!(mem.data[i], get_m512(a, i));
56363        }
56364    }
56365
56366    #[simd_test(enable = "avx512f")]
56367    #[cfg_attr(miri, ignore)]
56368    unsafe fn test_mm512_stream_pd() {
56369        #[repr(align(64))]
56370        struct Memory {
56371            pub data: [f64; 8],
56372        }
56373        let a = _mm512_set1_pd(7.0);
56374        let mut mem = Memory { data: [-1.0; 8] };
56375
56376        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
56377        for i in 0..8 {
56378            assert_eq!(mem.data[i], get_m512d(a, i));
56379        }
56380    }
56381
56382    #[simd_test(enable = "avx512f")]
56383    #[cfg_attr(miri, ignore)]
56384    unsafe fn test_mm512_stream_si512() {
56385        #[repr(align(64))]
56386        struct Memory {
56387            pub data: [i64; 8],
56388        }
56389        let a = _mm512_set1_epi32(7);
56390        let mut mem = Memory { data: [-1; 8] };
56391
56392        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56393        for i in 0..8 {
56394            assert_eq!(mem.data[i], get_m512i(a, i));
56395        }
56396    }
56397
56398    #[simd_test(enable = "avx512f")]
56399    unsafe fn test_mm512_stream_load_si512() {
56400        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56401        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56402        assert_eq_m512i(a, r);
56403    }
56404
56405    #[simd_test(enable = "avx512f")]
56406    unsafe fn test_mm512_reduce_add_epi32() {
56407        let a = _mm512_set1_epi32(1);
56408        let e: i32 = _mm512_reduce_add_epi32(a);
56409        assert_eq!(16, e);
56410    }
56411
56412    #[simd_test(enable = "avx512f")]
56413    unsafe fn test_mm512_mask_reduce_add_epi32() {
56414        let a = _mm512_set1_epi32(1);
56415        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
56416        assert_eq!(8, e);
56417    }
56418
56419    #[simd_test(enable = "avx512f")]
56420    unsafe fn test_mm512_reduce_add_ps() {
56421        let a = _mm512_set1_ps(1.);
56422        let e: f32 = _mm512_reduce_add_ps(a);
56423        assert_eq!(16., e);
56424    }
56425
56426    #[simd_test(enable = "avx512f")]
56427    unsafe fn test_mm512_mask_reduce_add_ps() {
56428        let a = _mm512_set1_ps(1.);
56429        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
56430        assert_eq!(8., e);
56431    }
56432
56433    #[simd_test(enable = "avx512f")]
56434    unsafe fn test_mm512_reduce_mul_epi32() {
56435        let a = _mm512_set1_epi32(2);
56436        let e: i32 = _mm512_reduce_mul_epi32(a);
56437        assert_eq!(65536, e);
56438    }
56439
56440    #[simd_test(enable = "avx512f")]
56441    unsafe fn test_mm512_mask_reduce_mul_epi32() {
56442        let a = _mm512_set1_epi32(2);
56443        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
56444        assert_eq!(256, e);
56445    }
56446
56447    #[simd_test(enable = "avx512f")]
56448    unsafe fn test_mm512_reduce_mul_ps() {
56449        let a = _mm512_set1_ps(2.);
56450        let e: f32 = _mm512_reduce_mul_ps(a);
56451        assert_eq!(65536., e);
56452    }
56453
56454    #[simd_test(enable = "avx512f")]
56455    unsafe fn test_mm512_mask_reduce_mul_ps() {
56456        let a = _mm512_set1_ps(2.);
56457        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
56458        assert_eq!(256., e);
56459    }
56460
56461    #[simd_test(enable = "avx512f")]
56462    unsafe fn test_mm512_reduce_max_epi32() {
56463        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56464        let e: i32 = _mm512_reduce_max_epi32(a);
56465        assert_eq!(15, e);
56466    }
56467
56468    #[simd_test(enable = "avx512f")]
56469    unsafe fn test_mm512_mask_reduce_max_epi32() {
56470        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56471        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
56472        assert_eq!(7, e);
56473    }
56474
56475    #[simd_test(enable = "avx512f")]
56476    unsafe fn test_mm512_reduce_max_epu32() {
56477        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56478        let e: u32 = _mm512_reduce_max_epu32(a);
56479        assert_eq!(15, e);
56480    }
56481
56482    #[simd_test(enable = "avx512f")]
56483    unsafe fn test_mm512_mask_reduce_max_epu32() {
56484        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56485        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
56486        assert_eq!(7, e);
56487    }
56488
56489    #[simd_test(enable = "avx512f")]
56490    unsafe fn test_mm512_reduce_max_ps() {
56491        let a = _mm512_set_ps(
56492            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56493        );
56494        let e: f32 = _mm512_reduce_max_ps(a);
56495        assert_eq!(15., e);
56496    }
56497
56498    #[simd_test(enable = "avx512f")]
56499    unsafe fn test_mm512_mask_reduce_max_ps() {
56500        let a = _mm512_set_ps(
56501            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56502        );
56503        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
56504        assert_eq!(7., e);
56505    }
56506
56507    #[simd_test(enable = "avx512f")]
56508    unsafe fn test_mm512_reduce_min_epi32() {
56509        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56510        let e: i32 = _mm512_reduce_min_epi32(a);
56511        assert_eq!(0, e);
56512    }
56513
56514    #[simd_test(enable = "avx512f")]
56515    unsafe fn test_mm512_mask_reduce_min_epi32() {
56516        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56517        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
56518        assert_eq!(0, e);
56519    }
56520
56521    #[simd_test(enable = "avx512f")]
56522    unsafe fn test_mm512_reduce_min_epu32() {
56523        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56524        let e: u32 = _mm512_reduce_min_epu32(a);
56525        assert_eq!(0, e);
56526    }
56527
56528    #[simd_test(enable = "avx512f")]
56529    unsafe fn test_mm512_mask_reduce_min_epu32() {
56530        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56531        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
56532        assert_eq!(0, e);
56533    }
56534
56535    #[simd_test(enable = "avx512f")]
56536    unsafe fn test_mm512_reduce_min_ps() {
56537        let a = _mm512_set_ps(
56538            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56539        );
56540        let e: f32 = _mm512_reduce_min_ps(a);
56541        assert_eq!(0., e);
56542    }
56543
56544    #[simd_test(enable = "avx512f")]
56545    unsafe fn test_mm512_mask_reduce_min_ps() {
56546        let a = _mm512_set_ps(
56547            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56548        );
56549        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
56550        assert_eq!(0., e);
56551    }
56552
56553    #[simd_test(enable = "avx512f")]
56554    unsafe fn test_mm512_reduce_and_epi32() {
56555        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56556        let e: i32 = _mm512_reduce_and_epi32(a);
56557        assert_eq!(0, e);
56558    }
56559
56560    #[simd_test(enable = "avx512f")]
56561    unsafe fn test_mm512_mask_reduce_and_epi32() {
56562        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56563        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56564        assert_eq!(1, e);
56565    }
56566
56567    #[simd_test(enable = "avx512f")]
56568    unsafe fn test_mm512_reduce_or_epi32() {
56569        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56570        let e: i32 = _mm512_reduce_or_epi32(a);
56571        assert_eq!(3, e);
56572    }
56573
56574    #[simd_test(enable = "avx512f")]
56575    unsafe fn test_mm512_mask_reduce_or_epi32() {
56576        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56577        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56578        assert_eq!(1, e);
56579    }
56580
56581    #[simd_test(enable = "avx512f")]
56582    unsafe fn test_mm512_mask_compress_epi32() {
56583        let src = _mm512_set1_epi32(200);
56584        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56585        let r = _mm512_mask_compress_epi32(src, 0, a);
56586        assert_eq_m512i(r, src);
56587        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
56588        let e = _mm512_set_epi32(
56589            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
56590        );
56591        assert_eq_m512i(r, e);
56592    }
56593
56594    #[simd_test(enable = "avx512f")]
56595    unsafe fn test_mm512_maskz_compress_epi32() {
56596        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56597        let r = _mm512_maskz_compress_epi32(0, a);
56598        assert_eq_m512i(r, _mm512_setzero_si512());
56599        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
56600        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
56601        assert_eq_m512i(r, e);
56602    }
56603
56604    #[simd_test(enable = "avx512f,avx512vl")]
56605    unsafe fn test_mm256_mask_compress_epi32() {
56606        let src = _mm256_set1_epi32(200);
56607        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56608        let r = _mm256_mask_compress_epi32(src, 0, a);
56609        assert_eq_m256i(r, src);
56610        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
56611        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
56612        assert_eq_m256i(r, e);
56613    }
56614
56615    #[simd_test(enable = "avx512f,avx512vl")]
56616    unsafe fn test_mm256_maskz_compress_epi32() {
56617        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56618        let r = _mm256_maskz_compress_epi32(0, a);
56619        assert_eq_m256i(r, _mm256_setzero_si256());
56620        let r = _mm256_maskz_compress_epi32(0b01010101, a);
56621        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
56622        assert_eq_m256i(r, e);
56623    }
56624
56625    #[simd_test(enable = "avx512f,avx512vl")]
56626    unsafe fn test_mm_mask_compress_epi32() {
56627        let src = _mm_set1_epi32(200);
56628        let a = _mm_set_epi32(0, 1, 2, 3);
56629        let r = _mm_mask_compress_epi32(src, 0, a);
56630        assert_eq_m128i(r, src);
56631        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
56632        let e = _mm_set_epi32(200, 200, 1, 3);
56633        assert_eq_m128i(r, e);
56634    }
56635
56636    #[simd_test(enable = "avx512f,avx512vl")]
56637    unsafe fn test_mm_maskz_compress_epi32() {
56638        let a = _mm_set_epi32(0, 1, 2, 3);
56639        let r = _mm_maskz_compress_epi32(0, a);
56640        assert_eq_m128i(r, _mm_setzero_si128());
56641        let r = _mm_maskz_compress_epi32(0b00000101, a);
56642        let e = _mm_set_epi32(0, 0, 1, 3);
56643        assert_eq_m128i(r, e);
56644    }
56645
56646    #[simd_test(enable = "avx512f")]
56647    unsafe fn test_mm512_mask_compress_ps() {
56648        let src = _mm512_set1_ps(200.);
56649        let a = _mm512_set_ps(
56650            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56651        );
56652        let r = _mm512_mask_compress_ps(src, 0, a);
56653        assert_eq_m512(r, src);
56654        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
56655        let e = _mm512_set_ps(
56656            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
56657        );
56658        assert_eq_m512(r, e);
56659    }
56660
56661    #[simd_test(enable = "avx512f")]
56662    unsafe fn test_mm512_maskz_compress_ps() {
56663        let a = _mm512_set_ps(
56664            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56665        );
56666        let r = _mm512_maskz_compress_ps(0, a);
56667        assert_eq_m512(r, _mm512_setzero_ps());
56668        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
56669        let e = _mm512_set_ps(
56670            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
56671        );
56672        assert_eq_m512(r, e);
56673    }
56674
56675    #[simd_test(enable = "avx512f,avx512vl")]
56676    unsafe fn test_mm256_mask_compress_ps() {
56677        let src = _mm256_set1_ps(200.);
56678        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56679        let r = _mm256_mask_compress_ps(src, 0, a);
56680        assert_eq_m256(r, src);
56681        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
56682        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
56683        assert_eq_m256(r, e);
56684    }
56685
56686    #[simd_test(enable = "avx512f,avx512vl")]
56687    unsafe fn test_mm256_maskz_compress_ps() {
56688        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56689        let r = _mm256_maskz_compress_ps(0, a);
56690        assert_eq_m256(r, _mm256_setzero_ps());
56691        let r = _mm256_maskz_compress_ps(0b01010101, a);
56692        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
56693        assert_eq_m256(r, e);
56694    }
56695
56696    #[simd_test(enable = "avx512f,avx512vl")]
56697    unsafe fn test_mm_mask_compress_ps() {
56698        let src = _mm_set1_ps(200.);
56699        let a = _mm_set_ps(0., 1., 2., 3.);
56700        let r = _mm_mask_compress_ps(src, 0, a);
56701        assert_eq_m128(r, src);
56702        let r = _mm_mask_compress_ps(src, 0b00000101, a);
56703        let e = _mm_set_ps(200., 200., 1., 3.);
56704        assert_eq_m128(r, e);
56705    }
56706
56707    #[simd_test(enable = "avx512f,avx512vl")]
56708    unsafe fn test_mm_maskz_compress_ps() {
56709        let a = _mm_set_ps(0., 1., 2., 3.);
56710        let r = _mm_maskz_compress_ps(0, a);
56711        assert_eq_m128(r, _mm_setzero_ps());
56712        let r = _mm_maskz_compress_ps(0b00000101, a);
56713        let e = _mm_set_ps(0., 0., 1., 3.);
56714        assert_eq_m128(r, e);
56715    }
56716
56717    #[simd_test(enable = "avx512f")]
56718    unsafe fn test_mm512_mask_compressstoreu_epi32() {
56719        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56720        let mut r = [0_i32; 16];
56721        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56722        assert_eq!(&r, &[0_i32; 16]);
56723        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
56724        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
56725    }
56726
56727    #[simd_test(enable = "avx512f,avx512vl")]
56728    unsafe fn test_mm256_mask_compressstoreu_epi32() {
56729        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56730        let mut r = [0_i32; 8];
56731        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56732        assert_eq!(&r, &[0_i32; 8]);
56733        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a);
56734        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56735    }
56736
56737    #[simd_test(enable = "avx512f,avx512vl")]
56738    unsafe fn test_mm_mask_compressstoreu_epi32() {
56739        let a = _mm_setr_epi32(1, 2, 3, 4);
56740        let mut r = [0_i32; 4];
56741        _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56742        assert_eq!(&r, &[0_i32; 4]);
56743        _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a);
56744        assert_eq!(&r, &[1, 2, 4, 0]);
56745    }
56746
56747    #[simd_test(enable = "avx512f")]
56748    unsafe fn test_mm512_mask_compressstoreu_epi64() {
56749        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56750        let mut r = [0_i64; 8];
56751        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56752        assert_eq!(&r, &[0_i64; 8]);
56753        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a);
56754        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56755    }
56756
56757    #[simd_test(enable = "avx512f,avx512vl")]
56758    unsafe fn test_mm256_mask_compressstoreu_epi64() {
56759        let a = _mm256_setr_epi64x(1, 2, 3, 4);
56760        let mut r = [0_i64; 4];
56761        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56762        assert_eq!(&r, &[0_i64; 4]);
56763        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a);
56764        assert_eq!(&r, &[1, 2, 4, 0]);
56765    }
56766
56767    #[simd_test(enable = "avx512f,avx512vl")]
56768    unsafe fn test_mm_mask_compressstoreu_epi64() {
56769        let a = _mm_setr_epi64x(1, 2);
56770        let mut r = [0_i64; 2];
56771        _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56772        assert_eq!(&r, &[0_i64; 2]);
56773        _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a);
56774        assert_eq!(&r, &[2, 0]);
56775    }
56776
56777    #[simd_test(enable = "avx512f")]
56778    unsafe fn test_mm512_mask_compressstoreu_ps() {
56779        let a = _mm512_setr_ps(
56780            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
56781            13_f32, 14_f32, 15_f32, 16_f32,
56782        );
56783        let mut r = [0_f32; 16];
56784        _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56785        assert_eq!(&r, &[0_f32; 16]);
56786        _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
56787        assert_eq!(
56788            &r,
56789            &[
56790                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
56791                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
56792            ]
56793        );
56794    }
56795
56796    #[simd_test(enable = "avx512f,avx512vl")]
56797    unsafe fn test_mm256_mask_compressstoreu_ps() {
56798        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
56799        let mut r = [0_f32; 8];
56800        _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56801        assert_eq!(&r, &[0_f32; 8]);
56802        _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a);
56803        assert_eq!(
56804            &r,
56805            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
56806        );
56807    }
56808
56809    #[simd_test(enable = "avx512f,avx512vl")]
56810    unsafe fn test_mm_mask_compressstoreu_ps() {
56811        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
56812        let mut r = [0.; 4];
56813        _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56814        assert_eq!(&r, &[0.; 4]);
56815        _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a);
56816        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
56817    }
56818
56819    #[simd_test(enable = "avx512f")]
56820    unsafe fn test_mm512_mask_compressstoreu_pd() {
56821        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
56822        let mut r = [0.; 8];
56823        _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56824        assert_eq!(&r, &[0.; 8]);
56825        _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a);
56826        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
56827    }
56828
56829    #[simd_test(enable = "avx512f,avx512vl")]
56830    unsafe fn test_mm256_mask_compressstoreu_pd() {
56831        let a = _mm256_setr_pd(1., 2., 3., 4.);
56832        let mut r = [0.; 4];
56833        _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56834        assert_eq!(&r, &[0.; 4]);
56835        _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a);
56836        assert_eq!(&r, &[1., 2., 4., 0.]);
56837    }
56838
56839    #[simd_test(enable = "avx512f,avx512vl")]
56840    unsafe fn test_mm_mask_compressstoreu_pd() {
56841        let a = _mm_setr_pd(1., 2.);
56842        let mut r = [0.; 2];
56843        _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56844        assert_eq!(&r, &[0.; 2]);
56845        _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a);
56846        assert_eq!(&r, &[2., 0.]);
56847    }
56848
56849    #[simd_test(enable = "avx512f")]
56850    unsafe fn test_mm512_mask_expand_epi32() {
56851        let src = _mm512_set1_epi32(200);
56852        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56853        let r = _mm512_mask_expand_epi32(src, 0, a);
56854        assert_eq_m512i(r, src);
56855        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
56856        let e = _mm512_set_epi32(
56857            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
56858        );
56859        assert_eq_m512i(r, e);
56860    }
56861
56862    #[simd_test(enable = "avx512f")]
56863    unsafe fn test_mm512_maskz_expand_epi32() {
56864        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56865        let r = _mm512_maskz_expand_epi32(0, a);
56866        assert_eq_m512i(r, _mm512_setzero_si512());
56867        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
56868        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
56869        assert_eq_m512i(r, e);
56870    }
56871
56872    #[simd_test(enable = "avx512f,avx512vl")]
56873    unsafe fn test_mm256_mask_expand_epi32() {
56874        let src = _mm256_set1_epi32(200);
56875        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56876        let r = _mm256_mask_expand_epi32(src, 0, a);
56877        assert_eq_m256i(r, src);
56878        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
56879        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
56880        assert_eq_m256i(r, e);
56881    }
56882
56883    #[simd_test(enable = "avx512f,avx512vl")]
56884    unsafe fn test_mm256_maskz_expand_epi32() {
56885        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56886        let r = _mm256_maskz_expand_epi32(0, a);
56887        assert_eq_m256i(r, _mm256_setzero_si256());
56888        let r = _mm256_maskz_expand_epi32(0b01010101, a);
56889        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
56890        assert_eq_m256i(r, e);
56891    }
56892
56893    #[simd_test(enable = "avx512f,avx512vl")]
56894    unsafe fn test_mm_mask_expand_epi32() {
56895        let src = _mm_set1_epi32(200);
56896        let a = _mm_set_epi32(0, 1, 2, 3);
56897        let r = _mm_mask_expand_epi32(src, 0, a);
56898        assert_eq_m128i(r, src);
56899        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
56900        let e = _mm_set_epi32(200, 2, 200, 3);
56901        assert_eq_m128i(r, e);
56902    }
56903
56904    #[simd_test(enable = "avx512f,avx512vl")]
56905    unsafe fn test_mm_maskz_expand_epi32() {
56906        let a = _mm_set_epi32(0, 1, 2, 3);
56907        let r = _mm_maskz_expand_epi32(0, a);
56908        assert_eq_m128i(r, _mm_setzero_si128());
56909        let r = _mm_maskz_expand_epi32(0b00000101, a);
56910        let e = _mm_set_epi32(0, 2, 0, 3);
56911        assert_eq_m128i(r, e);
56912    }
56913
56914    #[simd_test(enable = "avx512f")]
56915    unsafe fn test_mm512_mask_expand_ps() {
56916        let src = _mm512_set1_ps(200.);
56917        let a = _mm512_set_ps(
56918            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56919        );
56920        let r = _mm512_mask_expand_ps(src, 0, a);
56921        assert_eq_m512(r, src);
56922        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
56923        let e = _mm512_set_ps(
56924            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
56925        );
56926        assert_eq_m512(r, e);
56927    }
56928
56929    #[simd_test(enable = "avx512f")]
56930    unsafe fn test_mm512_maskz_expand_ps() {
56931        let a = _mm512_set_ps(
56932            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56933        );
56934        let r = _mm512_maskz_expand_ps(0, a);
56935        assert_eq_m512(r, _mm512_setzero_ps());
56936        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
56937        let e = _mm512_set_ps(
56938            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
56939        );
56940        assert_eq_m512(r, e);
56941    }
56942
56943    #[simd_test(enable = "avx512f,avx512vl")]
56944    unsafe fn test_mm256_mask_expand_ps() {
56945        let src = _mm256_set1_ps(200.);
56946        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56947        let r = _mm256_mask_expand_ps(src, 0, a);
56948        assert_eq_m256(r, src);
56949        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
56950        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
56951        assert_eq_m256(r, e);
56952    }
56953
56954    #[simd_test(enable = "avx512f,avx512vl")]
56955    unsafe fn test_mm256_maskz_expand_ps() {
56956        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56957        let r = _mm256_maskz_expand_ps(0, a);
56958        assert_eq_m256(r, _mm256_setzero_ps());
56959        let r = _mm256_maskz_expand_ps(0b01010101, a);
56960        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
56961        assert_eq_m256(r, e);
56962    }
56963
56964    #[simd_test(enable = "avx512f,avx512vl")]
56965    unsafe fn test_mm_mask_expand_ps() {
56966        let src = _mm_set1_ps(200.);
56967        let a = _mm_set_ps(0., 1., 2., 3.);
56968        let r = _mm_mask_expand_ps(src, 0, a);
56969        assert_eq_m128(r, src);
56970        let r = _mm_mask_expand_ps(src, 0b00000101, a);
56971        let e = _mm_set_ps(200., 2., 200., 3.);
56972        assert_eq_m128(r, e);
56973    }
56974
56975    #[simd_test(enable = "avx512f,avx512vl")]
56976    unsafe fn test_mm_maskz_expand_ps() {
56977        let a = _mm_set_ps(0., 1., 2., 3.);
56978        let r = _mm_maskz_expand_ps(0, a);
56979        assert_eq_m128(r, _mm_setzero_ps());
56980        let r = _mm_maskz_expand_ps(0b00000101, a);
56981        let e = _mm_set_ps(0., 2., 0., 3.);
56982        assert_eq_m128(r, e);
56983    }
56984
56985    #[simd_test(enable = "avx512f")]
56986    unsafe fn test_mm512_loadu_epi32() {
56987        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
56988        let p = a.as_ptr();
56989        let r = _mm512_loadu_epi32(black_box(p));
56990        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
56991        assert_eq_m512i(r, e);
56992    }
56993
56994    #[simd_test(enable = "avx512f,avx512vl")]
56995    unsafe fn test_mm256_loadu_epi32() {
56996        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
56997        let p = a.as_ptr();
56998        let r = _mm256_loadu_epi32(black_box(p));
56999        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57000        assert_eq_m256i(r, e);
57001    }
57002
57003    #[simd_test(enable = "avx512f,avx512vl")]
57004    unsafe fn test_mm_loadu_epi32() {
57005        let a = &[4, 3, 2, 5];
57006        let p = a.as_ptr();
57007        let r = _mm_loadu_epi32(black_box(p));
57008        let e = _mm_setr_epi32(4, 3, 2, 5);
57009        assert_eq_m128i(r, e);
57010    }
57011
57012    #[simd_test(enable = "avx512f")]
57013    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57014        let a = _mm512_set1_epi32(9);
57015        let mut r = _mm256_undefined_si256();
57016        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57017        let e = _mm256_set1_epi16(9);
57018        assert_eq_m256i(r, e);
57019    }
57020
57021    #[simd_test(enable = "avx512f,avx512vl")]
57022    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57023        let a = _mm256_set1_epi32(9);
57024        let mut r = _mm_undefined_si128();
57025        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57026        let e = _mm_set1_epi16(9);
57027        assert_eq_m128i(r, e);
57028    }
57029
57030    #[simd_test(enable = "avx512f,avx512vl")]
57031    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57032        let a = _mm_set1_epi32(9);
57033        let mut r = _mm_set1_epi8(0);
57034        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57035        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
57036        assert_eq_m128i(r, e);
57037    }
57038
57039    #[simd_test(enable = "avx512f")]
57040    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57041        let a = _mm512_set1_epi32(i32::MAX);
57042        let mut r = _mm256_undefined_si256();
57043        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57044        let e = _mm256_set1_epi16(i16::MAX);
57045        assert_eq_m256i(r, e);
57046    }
57047
57048    #[simd_test(enable = "avx512f,avx512vl")]
57049    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57050        let a = _mm256_set1_epi32(i32::MAX);
57051        let mut r = _mm_undefined_si128();
57052        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57053        let e = _mm_set1_epi16(i16::MAX);
57054        assert_eq_m128i(r, e);
57055    }
57056
57057    #[simd_test(enable = "avx512f,avx512vl")]
57058    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57059        let a = _mm_set1_epi32(i32::MAX);
57060        let mut r = _mm_set1_epi8(0);
57061        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57062        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57063        assert_eq_m128i(r, e);
57064    }
57065
57066    #[simd_test(enable = "avx512f")]
57067    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57068        let a = _mm512_set1_epi32(i32::MAX);
57069        let mut r = _mm256_undefined_si256();
57070        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57071        let e = _mm256_set1_epi16(u16::MAX as i16);
57072        assert_eq_m256i(r, e);
57073    }
57074
57075    #[simd_test(enable = "avx512f,avx512vl")]
57076    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57077        let a = _mm256_set1_epi32(i32::MAX);
57078        let mut r = _mm_undefined_si128();
57079        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57080        let e = _mm_set1_epi16(u16::MAX as i16);
57081        assert_eq_m128i(r, e);
57082    }
57083
57084    #[simd_test(enable = "avx512f,avx512vl")]
57085    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57086        let a = _mm_set1_epi32(i32::MAX);
57087        let mut r = _mm_set1_epi8(0);
57088        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57089        let e = _mm_set_epi16(
57090            0,
57091            0,
57092            0,
57093            0,
57094            u16::MAX as i16,
57095            u16::MAX as i16,
57096            u16::MAX as i16,
57097            u16::MAX as i16,
57098        );
57099        assert_eq_m128i(r, e);
57100    }
57101
57102    #[simd_test(enable = "avx512f")]
57103    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57104        let a = _mm512_set1_epi32(9);
57105        let mut r = _mm_undefined_si128();
57106        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57107        let e = _mm_set1_epi8(9);
57108        assert_eq_m128i(r, e);
57109    }
57110
57111    #[simd_test(enable = "avx512f,avx512vl")]
57112    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57113        let a = _mm256_set1_epi32(9);
57114        let mut r = _mm_set1_epi8(0);
57115        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57116        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
57117        assert_eq_m128i(r, e);
57118    }
57119
57120    #[simd_test(enable = "avx512f,avx512vl")]
57121    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57122        let a = _mm_set1_epi32(9);
57123        let mut r = _mm_set1_epi8(0);
57124        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57125        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
57126        assert_eq_m128i(r, e);
57127    }
57128
57129    #[simd_test(enable = "avx512f")]
57130    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57131        let a = _mm512_set1_epi32(i32::MAX);
57132        let mut r = _mm_undefined_si128();
57133        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57134        let e = _mm_set1_epi8(i8::MAX);
57135        assert_eq_m128i(r, e);
57136    }
57137
57138    #[simd_test(enable = "avx512f,avx512vl")]
57139    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57140        let a = _mm256_set1_epi32(i32::MAX);
57141        let mut r = _mm_set1_epi8(0);
57142        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57143        #[rustfmt::skip]
57144        let e = _mm_set_epi8(
57145            0, 0, 0, 0,
57146            0, 0, 0, 0,
57147            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57148            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57149        );
57150        assert_eq_m128i(r, e);
57151    }
57152
57153    #[simd_test(enable = "avx512f,avx512vl")]
57154    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57155        let a = _mm_set1_epi32(i32::MAX);
57156        let mut r = _mm_set1_epi8(0);
57157        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57158        #[rustfmt::skip]
57159        let e = _mm_set_epi8(
57160            0, 0, 0, 0,
57161            0, 0, 0, 0,
57162            0, 0, 0, 0,
57163            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57164        );
57165        assert_eq_m128i(r, e);
57166    }
57167
57168    #[simd_test(enable = "avx512f")]
57169    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57170        let a = _mm512_set1_epi32(i32::MAX);
57171        let mut r = _mm_undefined_si128();
57172        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57173        let e = _mm_set1_epi8(u8::MAX as i8);
57174        assert_eq_m128i(r, e);
57175    }
57176
57177    #[simd_test(enable = "avx512f,avx512vl")]
57178    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57179        let a = _mm256_set1_epi32(i32::MAX);
57180        let mut r = _mm_set1_epi8(0);
57181        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57182        #[rustfmt::skip]
57183        let e = _mm_set_epi8(
57184            0, 0, 0, 0,
57185            0, 0, 0, 0,
57186            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57187            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57188        );
57189        assert_eq_m128i(r, e);
57190    }
57191
57192    #[simd_test(enable = "avx512f,avx512vl")]
57193    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57194        let a = _mm_set1_epi32(i32::MAX);
57195        let mut r = _mm_set1_epi8(0);
57196        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57197        #[rustfmt::skip]
57198        let e = _mm_set_epi8(
57199            0, 0, 0, 0,
57200            0, 0, 0, 0,
57201            0, 0, 0, 0,
57202            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57203        );
57204        assert_eq_m128i(r, e);
57205    }
57206
57207    #[simd_test(enable = "avx512f")]
57208    unsafe fn test_mm512_storeu_epi32() {
57209        let a = _mm512_set1_epi32(9);
57210        let mut r = _mm512_undefined_epi32();
57211        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57212        assert_eq_m512i(r, a);
57213    }
57214
57215    #[simd_test(enable = "avx512f,avx512vl")]
57216    unsafe fn test_mm256_storeu_epi32() {
57217        let a = _mm256_set1_epi32(9);
57218        let mut r = _mm256_undefined_si256();
57219        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57220        assert_eq_m256i(r, a);
57221    }
57222
57223    #[simd_test(enable = "avx512f,avx512vl")]
57224    unsafe fn test_mm_storeu_epi32() {
57225        let a = _mm_set1_epi32(9);
57226        let mut r = _mm_undefined_si128();
57227        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57228        assert_eq_m128i(r, a);
57229    }
57230
57231    #[simd_test(enable = "avx512f")]
57232    unsafe fn test_mm512_loadu_si512() {
57233        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57234        let p = a.as_ptr();
57235        let r = _mm512_loadu_si512(black_box(p));
57236        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57237        assert_eq_m512i(r, e);
57238    }
57239
57240    #[simd_test(enable = "avx512f")]
57241    unsafe fn test_mm512_storeu_si512() {
57242        let a = _mm512_set1_epi32(9);
57243        let mut r = _mm512_undefined_epi32();
57244        _mm512_storeu_si512(&mut r as *mut _, a);
57245        assert_eq_m512i(r, a);
57246    }
57247
57248    #[simd_test(enable = "avx512f")]
57249    unsafe fn test_mm512_load_si512() {
57250        #[repr(align(64))]
57251        struct Align {
57252            data: [i32; 16], // 64 bytes
57253        }
57254        let a = Align {
57255            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57256        };
57257        let p = (a.data).as_ptr();
57258        let r = _mm512_load_si512(black_box(p));
57259        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57260        assert_eq_m512i(r, e);
57261    }
57262
57263    #[simd_test(enable = "avx512f")]
57264    unsafe fn test_mm512_store_si512() {
57265        let a = _mm512_set1_epi32(9);
57266        let mut r = _mm512_undefined_epi32();
57267        _mm512_store_si512(&mut r as *mut _, a);
57268        assert_eq_m512i(r, a);
57269    }
57270
57271    #[simd_test(enable = "avx512f")]
57272    unsafe fn test_mm512_load_epi32() {
57273        #[repr(align(64))]
57274        struct Align {
57275            data: [i32; 16], // 64 bytes
57276        }
57277        let a = Align {
57278            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57279        };
57280        let p = (a.data).as_ptr();
57281        let r = _mm512_load_epi32(black_box(p));
57282        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57283        assert_eq_m512i(r, e);
57284    }
57285
57286    #[simd_test(enable = "avx512f,avx512vl")]
57287    unsafe fn test_mm256_load_epi32() {
57288        #[repr(align(64))]
57289        struct Align {
57290            data: [i32; 8],
57291        }
57292        let a = Align {
57293            data: [4, 3, 2, 5, 8, 9, 64, 50],
57294        };
57295        let p = (a.data).as_ptr();
57296        let r = _mm256_load_epi32(black_box(p));
57297        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57298        assert_eq_m256i(r, e);
57299    }
57300
57301    #[simd_test(enable = "avx512f,avx512vl")]
57302    unsafe fn test_mm_load_epi32() {
57303        #[repr(align(64))]
57304        struct Align {
57305            data: [i32; 4],
57306        }
57307        let a = Align { data: [4, 3, 2, 5] };
57308        let p = (a.data).as_ptr();
57309        let r = _mm_load_epi32(black_box(p));
57310        let e = _mm_setr_epi32(4, 3, 2, 5);
57311        assert_eq_m128i(r, e);
57312    }
57313
57314    #[simd_test(enable = "avx512f")]
57315    unsafe fn test_mm512_store_epi32() {
57316        let a = _mm512_set1_epi32(9);
57317        let mut r = _mm512_undefined_epi32();
57318        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57319        assert_eq_m512i(r, a);
57320    }
57321
57322    #[simd_test(enable = "avx512f,avx512vl")]
57323    unsafe fn test_mm256_store_epi32() {
57324        let a = _mm256_set1_epi32(9);
57325        let mut r = _mm256_undefined_si256();
57326        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57327        assert_eq_m256i(r, a);
57328    }
57329
57330    #[simd_test(enable = "avx512f,avx512vl")]
57331    unsafe fn test_mm_store_epi32() {
57332        let a = _mm_set1_epi32(9);
57333        let mut r = _mm_undefined_si128();
57334        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57335        assert_eq_m128i(r, a);
57336    }
57337
57338    #[simd_test(enable = "avx512f")]
57339    unsafe fn test_mm512_load_ps() {
57340        #[repr(align(64))]
57341        struct Align {
57342            data: [f32; 16], // 64 bytes
57343        }
57344        let a = Align {
57345            data: [
57346                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57347            ],
57348        };
57349        let p = (a.data).as_ptr();
57350        let r = _mm512_load_ps(black_box(p));
57351        let e = _mm512_setr_ps(
57352            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57353        );
57354        assert_eq_m512(r, e);
57355    }
57356
57357    #[simd_test(enable = "avx512f")]
57358    unsafe fn test_mm512_store_ps() {
57359        let a = _mm512_set1_ps(9.);
57360        let mut r = _mm512_undefined_ps();
57361        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57362        assert_eq_m512(r, a);
57363    }
57364
57365    #[simd_test(enable = "avx512f")]
57366    unsafe fn test_mm512_mask_set1_epi32() {
57367        let src = _mm512_set1_epi32(2);
57368        let a: i32 = 11;
57369        let r = _mm512_mask_set1_epi32(src, 0, a);
57370        assert_eq_m512i(r, src);
57371        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
57372        let e = _mm512_set1_epi32(11);
57373        assert_eq_m512i(r, e);
57374    }
57375
57376    #[simd_test(enable = "avx512f")]
57377    unsafe fn test_mm512_maskz_set1_epi32() {
57378        let a: i32 = 11;
57379        let r = _mm512_maskz_set1_epi32(0, a);
57380        assert_eq_m512i(r, _mm512_setzero_si512());
57381        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
57382        let e = _mm512_set1_epi32(11);
57383        assert_eq_m512i(r, e);
57384    }
57385
57386    #[simd_test(enable = "avx512f,avx512vl")]
57387    unsafe fn test_mm256_mask_set1_epi32() {
57388        let src = _mm256_set1_epi32(2);
57389        let a: i32 = 11;
57390        let r = _mm256_mask_set1_epi32(src, 0, a);
57391        assert_eq_m256i(r, src);
57392        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
57393        let e = _mm256_set1_epi32(11);
57394        assert_eq_m256i(r, e);
57395    }
57396
57397    #[simd_test(enable = "avx512f")]
57398    unsafe fn test_mm256_maskz_set1_epi32() {
57399        let a: i32 = 11;
57400        let r = _mm256_maskz_set1_epi32(0, a);
57401        assert_eq_m256i(r, _mm256_setzero_si256());
57402        let r = _mm256_maskz_set1_epi32(0b11111111, a);
57403        let e = _mm256_set1_epi32(11);
57404        assert_eq_m256i(r, e);
57405    }
57406
57407    #[simd_test(enable = "avx512f,avx512vl")]
57408    unsafe fn test_mm_mask_set1_epi32() {
57409        let src = _mm_set1_epi32(2);
57410        let a: i32 = 11;
57411        let r = _mm_mask_set1_epi32(src, 0, a);
57412        assert_eq_m128i(r, src);
57413        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
57414        let e = _mm_set1_epi32(11);
57415        assert_eq_m128i(r, e);
57416    }
57417
57418    #[simd_test(enable = "avx512f")]
57419    unsafe fn test_mm_maskz_set1_epi32() {
57420        let a: i32 = 11;
57421        let r = _mm_maskz_set1_epi32(0, a);
57422        assert_eq_m128i(r, _mm_setzero_si128());
57423        let r = _mm_maskz_set1_epi32(0b00001111, a);
57424        let e = _mm_set1_epi32(11);
57425        assert_eq_m128i(r, e);
57426    }
57427
57428    #[simd_test(enable = "avx512f")]
57429    unsafe fn test_mm_mask_move_ss() {
57430        let src = _mm_set_ps(10., 11., 100., 110.);
57431        let a = _mm_set_ps(1., 2., 10., 20.);
57432        let b = _mm_set_ps(3., 4., 30., 40.);
57433        let r = _mm_mask_move_ss(src, 0, a, b);
57434        let e = _mm_set_ps(1., 2., 10., 110.);
57435        assert_eq_m128(r, e);
57436        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
57437        let e = _mm_set_ps(1., 2., 10., 40.);
57438        assert_eq_m128(r, e);
57439    }
57440
57441    #[simd_test(enable = "avx512f")]
57442    unsafe fn test_mm_maskz_move_ss() {
57443        let a = _mm_set_ps(1., 2., 10., 20.);
57444        let b = _mm_set_ps(3., 4., 30., 40.);
57445        let r = _mm_maskz_move_ss(0, a, b);
57446        let e = _mm_set_ps(1., 2., 10., 0.);
57447        assert_eq_m128(r, e);
57448        let r = _mm_maskz_move_ss(0b11111111, a, b);
57449        let e = _mm_set_ps(1., 2., 10., 40.);
57450        assert_eq_m128(r, e);
57451    }
57452
57453    #[simd_test(enable = "avx512f")]
57454    unsafe fn test_mm_mask_move_sd() {
57455        let src = _mm_set_pd(10., 11.);
57456        let a = _mm_set_pd(1., 2.);
57457        let b = _mm_set_pd(3., 4.);
57458        let r = _mm_mask_move_sd(src, 0, a, b);
57459        let e = _mm_set_pd(1., 11.);
57460        assert_eq_m128d(r, e);
57461        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
57462        let e = _mm_set_pd(1., 4.);
57463        assert_eq_m128d(r, e);
57464    }
57465
57466    #[simd_test(enable = "avx512f")]
57467    unsafe fn test_mm_maskz_move_sd() {
57468        let a = _mm_set_pd(1., 2.);
57469        let b = _mm_set_pd(3., 4.);
57470        let r = _mm_maskz_move_sd(0, a, b);
57471        let e = _mm_set_pd(1., 0.);
57472        assert_eq_m128d(r, e);
57473        let r = _mm_maskz_move_sd(0b11111111, a, b);
57474        let e = _mm_set_pd(1., 4.);
57475        assert_eq_m128d(r, e);
57476    }
57477
57478    #[simd_test(enable = "avx512f")]
57479    unsafe fn test_mm_mask_add_ss() {
57480        let src = _mm_set_ps(10., 11., 100., 110.);
57481        let a = _mm_set_ps(1., 2., 10., 20.);
57482        let b = _mm_set_ps(3., 4., 30., 40.);
57483        let r = _mm_mask_add_ss(src, 0, a, b);
57484        let e = _mm_set_ps(1., 2., 10., 110.);
57485        assert_eq_m128(r, e);
57486        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
57487        let e = _mm_set_ps(1., 2., 10., 60.);
57488        assert_eq_m128(r, e);
57489    }
57490
57491    #[simd_test(enable = "avx512f")]
57492    unsafe fn test_mm_maskz_add_ss() {
57493        let a = _mm_set_ps(1., 2., 10., 20.);
57494        let b = _mm_set_ps(3., 4., 30., 40.);
57495        let r = _mm_maskz_add_ss(0, a, b);
57496        let e = _mm_set_ps(1., 2., 10., 0.);
57497        assert_eq_m128(r, e);
57498        let r = _mm_maskz_add_ss(0b11111111, a, b);
57499        let e = _mm_set_ps(1., 2., 10., 60.);
57500        assert_eq_m128(r, e);
57501    }
57502
57503    #[simd_test(enable = "avx512f")]
57504    unsafe fn test_mm_mask_add_sd() {
57505        let src = _mm_set_pd(10., 11.);
57506        let a = _mm_set_pd(1., 2.);
57507        let b = _mm_set_pd(3., 4.);
57508        let r = _mm_mask_add_sd(src, 0, a, b);
57509        let e = _mm_set_pd(1., 11.);
57510        assert_eq_m128d(r, e);
57511        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
57512        let e = _mm_set_pd(1., 6.);
57513        assert_eq_m128d(r, e);
57514    }
57515
57516    #[simd_test(enable = "avx512f")]
57517    unsafe fn test_mm_maskz_add_sd() {
57518        let a = _mm_set_pd(1., 2.);
57519        let b = _mm_set_pd(3., 4.);
57520        let r = _mm_maskz_add_sd(0, a, b);
57521        let e = _mm_set_pd(1., 0.);
57522        assert_eq_m128d(r, e);
57523        let r = _mm_maskz_add_sd(0b11111111, a, b);
57524        let e = _mm_set_pd(1., 6.);
57525        assert_eq_m128d(r, e);
57526    }
57527
57528    #[simd_test(enable = "avx512f")]
57529    unsafe fn test_mm_mask_sub_ss() {
57530        let src = _mm_set_ps(10., 11., 100., 110.);
57531        let a = _mm_set_ps(1., 2., 10., 20.);
57532        let b = _mm_set_ps(3., 4., 30., 40.);
57533        let r = _mm_mask_sub_ss(src, 0, a, b);
57534        let e = _mm_set_ps(1., 2., 10., 110.);
57535        assert_eq_m128(r, e);
57536        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
57537        let e = _mm_set_ps(1., 2., 10., -20.);
57538        assert_eq_m128(r, e);
57539    }
57540
57541    #[simd_test(enable = "avx512f")]
57542    unsafe fn test_mm_maskz_sub_ss() {
57543        let a = _mm_set_ps(1., 2., 10., 20.);
57544        let b = _mm_set_ps(3., 4., 30., 40.);
57545        let r = _mm_maskz_sub_ss(0, a, b);
57546        let e = _mm_set_ps(1., 2., 10., 0.);
57547        assert_eq_m128(r, e);
57548        let r = _mm_maskz_sub_ss(0b11111111, a, b);
57549        let e = _mm_set_ps(1., 2., 10., -20.);
57550        assert_eq_m128(r, e);
57551    }
57552
57553    #[simd_test(enable = "avx512f")]
57554    unsafe fn test_mm_mask_sub_sd() {
57555        let src = _mm_set_pd(10., 11.);
57556        let a = _mm_set_pd(1., 2.);
57557        let b = _mm_set_pd(3., 4.);
57558        let r = _mm_mask_sub_sd(src, 0, a, b);
57559        let e = _mm_set_pd(1., 11.);
57560        assert_eq_m128d(r, e);
57561        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
57562        let e = _mm_set_pd(1., -2.);
57563        assert_eq_m128d(r, e);
57564    }
57565
57566    #[simd_test(enable = "avx512f")]
57567    unsafe fn test_mm_maskz_sub_sd() {
57568        let a = _mm_set_pd(1., 2.);
57569        let b = _mm_set_pd(3., 4.);
57570        let r = _mm_maskz_sub_sd(0, a, b);
57571        let e = _mm_set_pd(1., 0.);
57572        assert_eq_m128d(r, e);
57573        let r = _mm_maskz_sub_sd(0b11111111, a, b);
57574        let e = _mm_set_pd(1., -2.);
57575        assert_eq_m128d(r, e);
57576    }
57577
57578    #[simd_test(enable = "avx512f")]
57579    unsafe fn test_mm_mask_mul_ss() {
57580        let src = _mm_set_ps(10., 11., 100., 110.);
57581        let a = _mm_set_ps(1., 2., 10., 20.);
57582        let b = _mm_set_ps(3., 4., 30., 40.);
57583        let r = _mm_mask_mul_ss(src, 0, a, b);
57584        let e = _mm_set_ps(1., 2., 10., 110.);
57585        assert_eq_m128(r, e);
57586        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
57587        let e = _mm_set_ps(1., 2., 10., 800.);
57588        assert_eq_m128(r, e);
57589    }
57590
57591    #[simd_test(enable = "avx512f")]
57592    unsafe fn test_mm_maskz_mul_ss() {
57593        let a = _mm_set_ps(1., 2., 10., 20.);
57594        let b = _mm_set_ps(3., 4., 30., 40.);
57595        let r = _mm_maskz_mul_ss(0, a, b);
57596        let e = _mm_set_ps(1., 2., 10., 0.);
57597        assert_eq_m128(r, e);
57598        let r = _mm_maskz_mul_ss(0b11111111, a, b);
57599        let e = _mm_set_ps(1., 2., 10., 800.);
57600        assert_eq_m128(r, e);
57601    }
57602
57603    #[simd_test(enable = "avx512f")]
57604    unsafe fn test_mm_mask_mul_sd() {
57605        let src = _mm_set_pd(10., 11.);
57606        let a = _mm_set_pd(1., 2.);
57607        let b = _mm_set_pd(3., 4.);
57608        let r = _mm_mask_mul_sd(src, 0, a, b);
57609        let e = _mm_set_pd(1., 11.);
57610        assert_eq_m128d(r, e);
57611        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
57612        let e = _mm_set_pd(1., 8.);
57613        assert_eq_m128d(r, e);
57614    }
57615
57616    #[simd_test(enable = "avx512f")]
57617    unsafe fn test_mm_maskz_mul_sd() {
57618        let a = _mm_set_pd(1., 2.);
57619        let b = _mm_set_pd(3., 4.);
57620        let r = _mm_maskz_mul_sd(0, a, b);
57621        let e = _mm_set_pd(1., 0.);
57622        assert_eq_m128d(r, e);
57623        let r = _mm_maskz_mul_sd(0b11111111, a, b);
57624        let e = _mm_set_pd(1., 8.);
57625        assert_eq_m128d(r, e);
57626    }
57627
57628    #[simd_test(enable = "avx512f")]
57629    unsafe fn test_mm_mask_div_ss() {
57630        let src = _mm_set_ps(10., 11., 100., 110.);
57631        let a = _mm_set_ps(1., 2., 10., 20.);
57632        let b = _mm_set_ps(3., 4., 30., 40.);
57633        let r = _mm_mask_div_ss(src, 0, a, b);
57634        let e = _mm_set_ps(1., 2., 10., 110.);
57635        assert_eq_m128(r, e);
57636        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
57637        let e = _mm_set_ps(1., 2., 10., 0.5);
57638        assert_eq_m128(r, e);
57639    }
57640
57641    #[simd_test(enable = "avx512f")]
57642    unsafe fn test_mm_maskz_div_ss() {
57643        let a = _mm_set_ps(1., 2., 10., 20.);
57644        let b = _mm_set_ps(3., 4., 30., 40.);
57645        let r = _mm_maskz_div_ss(0, a, b);
57646        let e = _mm_set_ps(1., 2., 10., 0.);
57647        assert_eq_m128(r, e);
57648        let r = _mm_maskz_div_ss(0b11111111, a, b);
57649        let e = _mm_set_ps(1., 2., 10., 0.5);
57650        assert_eq_m128(r, e);
57651    }
57652
57653    #[simd_test(enable = "avx512f")]
57654    unsafe fn test_mm_mask_div_sd() {
57655        let src = _mm_set_pd(10., 11.);
57656        let a = _mm_set_pd(1., 2.);
57657        let b = _mm_set_pd(3., 4.);
57658        let r = _mm_mask_div_sd(src, 0, a, b);
57659        let e = _mm_set_pd(1., 11.);
57660        assert_eq_m128d(r, e);
57661        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
57662        let e = _mm_set_pd(1., 0.5);
57663        assert_eq_m128d(r, e);
57664    }
57665
57666    #[simd_test(enable = "avx512f")]
57667    unsafe fn test_mm_maskz_div_sd() {
57668        let a = _mm_set_pd(1., 2.);
57669        let b = _mm_set_pd(3., 4.);
57670        let r = _mm_maskz_div_sd(0, a, b);
57671        let e = _mm_set_pd(1., 0.);
57672        assert_eq_m128d(r, e);
57673        let r = _mm_maskz_div_sd(0b11111111, a, b);
57674        let e = _mm_set_pd(1., 0.5);
57675        assert_eq_m128d(r, e);
57676    }
57677
57678    #[simd_test(enable = "avx512f")]
57679    unsafe fn test_mm_mask_max_ss() {
57680        let a = _mm_set_ps(0., 1., 2., 3.);
57681        let b = _mm_set_ps(4., 5., 6., 7.);
57682        let r = _mm_mask_max_ss(a, 0, a, b);
57683        let e = _mm_set_ps(0., 1., 2., 3.);
57684        assert_eq_m128(r, e);
57685        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
57686        let e = _mm_set_ps(0., 1., 2., 7.);
57687        assert_eq_m128(r, e);
57688    }
57689
57690    #[simd_test(enable = "avx512f")]
57691    unsafe fn test_mm_maskz_max_ss() {
57692        let a = _mm_set_ps(0., 1., 2., 3.);
57693        let b = _mm_set_ps(4., 5., 6., 7.);
57694        let r = _mm_maskz_max_ss(0, a, b);
57695        let e = _mm_set_ps(0., 1., 2., 0.);
57696        assert_eq_m128(r, e);
57697        let r = _mm_maskz_max_ss(0b11111111, a, b);
57698        let e = _mm_set_ps(0., 1., 2., 7.);
57699        assert_eq_m128(r, e);
57700    }
57701
57702    #[simd_test(enable = "avx512f")]
57703    unsafe fn test_mm_mask_max_sd() {
57704        let a = _mm_set_pd(0., 1.);
57705        let b = _mm_set_pd(2., 3.);
57706        let r = _mm_mask_max_sd(a, 0, a, b);
57707        let e = _mm_set_pd(0., 1.);
57708        assert_eq_m128d(r, e);
57709        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
57710        let e = _mm_set_pd(0., 3.);
57711        assert_eq_m128d(r, e);
57712    }
57713
57714    #[simd_test(enable = "avx512f")]
57715    unsafe fn test_mm_maskz_max_sd() {
57716        let a = _mm_set_pd(0., 1.);
57717        let b = _mm_set_pd(2., 3.);
57718        let r = _mm_maskz_max_sd(0, a, b);
57719        let e = _mm_set_pd(0., 0.);
57720        assert_eq_m128d(r, e);
57721        let r = _mm_maskz_max_sd(0b11111111, a, b);
57722        let e = _mm_set_pd(0., 3.);
57723        assert_eq_m128d(r, e);
57724    }
57725
57726    #[simd_test(enable = "avx512f")]
57727    unsafe fn test_mm_mask_min_ss() {
57728        let a = _mm_set_ps(0., 1., 2., 3.);
57729        let b = _mm_set_ps(4., 5., 6., 7.);
57730        let r = _mm_mask_min_ss(a, 0, a, b);
57731        let e = _mm_set_ps(0., 1., 2., 3.);
57732        assert_eq_m128(r, e);
57733        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
57734        let e = _mm_set_ps(0., 1., 2., 3.);
57735        assert_eq_m128(r, e);
57736    }
57737
57738    #[simd_test(enable = "avx512f")]
57739    unsafe fn test_mm_maskz_min_ss() {
57740        let a = _mm_set_ps(0., 1., 2., 3.);
57741        let b = _mm_set_ps(4., 5., 6., 7.);
57742        let r = _mm_maskz_min_ss(0, a, b);
57743        let e = _mm_set_ps(0., 1., 2., 0.);
57744        assert_eq_m128(r, e);
57745        let r = _mm_maskz_min_ss(0b11111111, a, b);
57746        let e = _mm_set_ps(0., 1., 2., 3.);
57747        assert_eq_m128(r, e);
57748    }
57749
57750    #[simd_test(enable = "avx512f")]
57751    unsafe fn test_mm_mask_min_sd() {
57752        let a = _mm_set_pd(0., 1.);
57753        let b = _mm_set_pd(2., 3.);
57754        let r = _mm_mask_min_sd(a, 0, a, b);
57755        let e = _mm_set_pd(0., 1.);
57756        assert_eq_m128d(r, e);
57757        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
57758        let e = _mm_set_pd(0., 1.);
57759        assert_eq_m128d(r, e);
57760    }
57761
57762    #[simd_test(enable = "avx512f")]
57763    unsafe fn test_mm_maskz_min_sd() {
57764        let a = _mm_set_pd(0., 1.);
57765        let b = _mm_set_pd(2., 3.);
57766        let r = _mm_maskz_min_sd(0, a, b);
57767        let e = _mm_set_pd(0., 0.);
57768        assert_eq_m128d(r, e);
57769        let r = _mm_maskz_min_sd(0b11111111, a, b);
57770        let e = _mm_set_pd(0., 1.);
57771        assert_eq_m128d(r, e);
57772    }
57773
57774    #[simd_test(enable = "avx512f")]
57775    unsafe fn test_mm_mask_sqrt_ss() {
57776        let src = _mm_set_ps(10., 11., 100., 110.);
57777        let a = _mm_set_ps(1., 2., 10., 20.);
57778        let b = _mm_set_ps(3., 4., 30., 4.);
57779        let r = _mm_mask_sqrt_ss(src, 0, a, b);
57780        let e = _mm_set_ps(1., 2., 10., 110.);
57781        assert_eq_m128(r, e);
57782        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
57783        let e = _mm_set_ps(1., 2., 10., 2.);
57784        assert_eq_m128(r, e);
57785    }
57786
57787    #[simd_test(enable = "avx512f")]
57788    unsafe fn test_mm_maskz_sqrt_ss() {
57789        let a = _mm_set_ps(1., 2., 10., 20.);
57790        let b = _mm_set_ps(3., 4., 30., 4.);
57791        let r = _mm_maskz_sqrt_ss(0, a, b);
57792        let e = _mm_set_ps(1., 2., 10., 0.);
57793        assert_eq_m128(r, e);
57794        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
57795        let e = _mm_set_ps(1., 2., 10., 2.);
57796        assert_eq_m128(r, e);
57797    }
57798
57799    #[simd_test(enable = "avx512f")]
57800    unsafe fn test_mm_mask_sqrt_sd() {
57801        let src = _mm_set_pd(10., 11.);
57802        let a = _mm_set_pd(1., 2.);
57803        let b = _mm_set_pd(3., 4.);
57804        let r = _mm_mask_sqrt_sd(src, 0, a, b);
57805        let e = _mm_set_pd(1., 11.);
57806        assert_eq_m128d(r, e);
57807        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
57808        let e = _mm_set_pd(1., 2.);
57809        assert_eq_m128d(r, e);
57810    }
57811
57812    #[simd_test(enable = "avx512f")]
57813    unsafe fn test_mm_maskz_sqrt_sd() {
57814        let a = _mm_set_pd(1., 2.);
57815        let b = _mm_set_pd(3., 4.);
57816        let r = _mm_maskz_sqrt_sd(0, a, b);
57817        let e = _mm_set_pd(1., 0.);
57818        assert_eq_m128d(r, e);
57819        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
57820        let e = _mm_set_pd(1., 2.);
57821        assert_eq_m128d(r, e);
57822    }
57823
57824    #[simd_test(enable = "avx512f")]
57825    unsafe fn test_mm_rsqrt14_ss() {
57826        let a = _mm_set_ps(1., 2., 10., 20.);
57827        let b = _mm_set_ps(3., 4., 30., 4.);
57828        let r = _mm_rsqrt14_ss(a, b);
57829        let e = _mm_set_ps(1., 2., 10., 0.5);
57830        assert_eq_m128(r, e);
57831    }
57832
57833    #[simd_test(enable = "avx512f")]
57834    unsafe fn test_mm_mask_rsqrt14_ss() {
57835        let src = _mm_set_ps(10., 11., 100., 110.);
57836        let a = _mm_set_ps(1., 2., 10., 20.);
57837        let b = _mm_set_ps(3., 4., 30., 4.);
57838        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
57839        let e = _mm_set_ps(1., 2., 10., 110.);
57840        assert_eq_m128(r, e);
57841        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
57842        let e = _mm_set_ps(1., 2., 10., 0.5);
57843        assert_eq_m128(r, e);
57844    }
57845
57846    #[simd_test(enable = "avx512f")]
57847    unsafe fn test_mm_maskz_rsqrt14_ss() {
57848        let a = _mm_set_ps(1., 2., 10., 20.);
57849        let b = _mm_set_ps(3., 4., 30., 4.);
57850        let r = _mm_maskz_rsqrt14_ss(0, a, b);
57851        let e = _mm_set_ps(1., 2., 10., 0.);
57852        assert_eq_m128(r, e);
57853        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
57854        let e = _mm_set_ps(1., 2., 10., 0.5);
57855        assert_eq_m128(r, e);
57856    }
57857
57858    #[simd_test(enable = "avx512f")]
57859    unsafe fn test_mm_rsqrt14_sd() {
57860        let a = _mm_set_pd(1., 2.);
57861        let b = _mm_set_pd(3., 4.);
57862        let r = _mm_rsqrt14_sd(a, b);
57863        let e = _mm_set_pd(1., 0.5);
57864        assert_eq_m128d(r, e);
57865    }
57866
57867    #[simd_test(enable = "avx512f")]
57868    unsafe fn test_mm_mask_rsqrt14_sd() {
57869        let src = _mm_set_pd(10., 11.);
57870        let a = _mm_set_pd(1., 2.);
57871        let b = _mm_set_pd(3., 4.);
57872        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
57873        let e = _mm_set_pd(1., 11.);
57874        assert_eq_m128d(r, e);
57875        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
57876        let e = _mm_set_pd(1., 0.5);
57877        assert_eq_m128d(r, e);
57878    }
57879
57880    #[simd_test(enable = "avx512f")]
57881    unsafe fn test_mm_maskz_rsqrt14_sd() {
57882        let a = _mm_set_pd(1., 2.);
57883        let b = _mm_set_pd(3., 4.);
57884        let r = _mm_maskz_rsqrt14_sd(0, a, b);
57885        let e = _mm_set_pd(1., 0.);
57886        assert_eq_m128d(r, e);
57887        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
57888        let e = _mm_set_pd(1., 0.5);
57889        assert_eq_m128d(r, e);
57890    }
57891
57892    #[simd_test(enable = "avx512f")]
57893    unsafe fn test_mm_rcp14_ss() {
57894        let a = _mm_set_ps(1., 2., 10., 20.);
57895        let b = _mm_set_ps(3., 4., 30., 4.);
57896        let r = _mm_rcp14_ss(a, b);
57897        let e = _mm_set_ps(1., 2., 10., 0.25);
57898        assert_eq_m128(r, e);
57899    }
57900
57901    #[simd_test(enable = "avx512f")]
57902    unsafe fn test_mm_mask_rcp14_ss() {
57903        let src = _mm_set_ps(10., 11., 100., 110.);
57904        let a = _mm_set_ps(1., 2., 10., 20.);
57905        let b = _mm_set_ps(3., 4., 30., 4.);
57906        let r = _mm_mask_rcp14_ss(src, 0, a, b);
57907        let e = _mm_set_ps(1., 2., 10., 110.);
57908        assert_eq_m128(r, e);
57909        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
57910        let e = _mm_set_ps(1., 2., 10., 0.25);
57911        assert_eq_m128(r, e);
57912    }
57913
57914    #[simd_test(enable = "avx512f")]
57915    unsafe fn test_mm_maskz_rcp14_ss() {
57916        let a = _mm_set_ps(1., 2., 10., 20.);
57917        let b = _mm_set_ps(3., 4., 30., 4.);
57918        let r = _mm_maskz_rcp14_ss(0, a, b);
57919        let e = _mm_set_ps(1., 2., 10., 0.);
57920        assert_eq_m128(r, e);
57921        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
57922        let e = _mm_set_ps(1., 2., 10., 0.25);
57923        assert_eq_m128(r, e);
57924    }
57925
57926    #[simd_test(enable = "avx512f")]
57927    unsafe fn test_mm_rcp14_sd() {
57928        let a = _mm_set_pd(1., 2.);
57929        let b = _mm_set_pd(3., 4.);
57930        let r = _mm_rcp14_sd(a, b);
57931        let e = _mm_set_pd(1., 0.25);
57932        assert_eq_m128d(r, e);
57933    }
57934
57935    #[simd_test(enable = "avx512f")]
57936    unsafe fn test_mm_mask_rcp14_sd() {
57937        let src = _mm_set_pd(10., 11.);
57938        let a = _mm_set_pd(1., 2.);
57939        let b = _mm_set_pd(3., 4.);
57940        let r = _mm_mask_rcp14_sd(src, 0, a, b);
57941        let e = _mm_set_pd(1., 11.);
57942        assert_eq_m128d(r, e);
57943        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
57944        let e = _mm_set_pd(1., 0.25);
57945        assert_eq_m128d(r, e);
57946    }
57947
57948    #[simd_test(enable = "avx512f")]
57949    unsafe fn test_mm_maskz_rcp14_sd() {
57950        let a = _mm_set_pd(1., 2.);
57951        let b = _mm_set_pd(3., 4.);
57952        let r = _mm_maskz_rcp14_sd(0, a, b);
57953        let e = _mm_set_pd(1., 0.);
57954        assert_eq_m128d(r, e);
57955        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
57956        let e = _mm_set_pd(1., 0.25);
57957        assert_eq_m128d(r, e);
57958    }
57959
57960    #[simd_test(enable = "avx512f")]
57961    unsafe fn test_mm_getexp_ss() {
57962        let a = _mm_set1_ps(2.);
57963        let b = _mm_set1_ps(3.);
57964        let r = _mm_getexp_ss(a, b);
57965        let e = _mm_set_ps(2., 2., 2., 1.);
57966        assert_eq_m128(r, e);
57967    }
57968
57969    #[simd_test(enable = "avx512f")]
57970    unsafe fn test_mm_mask_getexp_ss() {
57971        let a = _mm_set1_ps(2.);
57972        let b = _mm_set1_ps(3.);
57973        let r = _mm_mask_getexp_ss(a, 0, a, b);
57974        let e = _mm_set_ps(2., 2., 2., 2.);
57975        assert_eq_m128(r, e);
57976        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
57977        let e = _mm_set_ps(2., 2., 2., 1.);
57978        assert_eq_m128(r, e);
57979    }
57980
57981    #[simd_test(enable = "avx512f")]
57982    unsafe fn test_mm_maskz_getexp_ss() {
57983        let a = _mm_set1_ps(2.);
57984        let b = _mm_set1_ps(3.);
57985        let r = _mm_maskz_getexp_ss(0, a, b);
57986        let e = _mm_set_ps(2., 2., 2., 0.);
57987        assert_eq_m128(r, e);
57988        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
57989        let e = _mm_set_ps(2., 2., 2., 1.);
57990        assert_eq_m128(r, e);
57991    }
57992
57993    #[simd_test(enable = "avx512f")]
57994    unsafe fn test_mm_getexp_sd() {
57995        let a = _mm_set1_pd(2.);
57996        let b = _mm_set1_pd(3.);
57997        let r = _mm_getexp_sd(a, b);
57998        let e = _mm_set_pd(2., 1.);
57999        assert_eq_m128d(r, e);
58000    }
58001
58002    #[simd_test(enable = "avx512f")]
58003    unsafe fn test_mm_mask_getexp_sd() {
58004        let a = _mm_set1_pd(2.);
58005        let b = _mm_set1_pd(3.);
58006        let r = _mm_mask_getexp_sd(a, 0, a, b);
58007        let e = _mm_set_pd(2., 2.);
58008        assert_eq_m128d(r, e);
58009        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
58010        let e = _mm_set_pd(2., 1.);
58011        assert_eq_m128d(r, e);
58012    }
58013
58014    #[simd_test(enable = "avx512f")]
58015    unsafe fn test_mm_maskz_getexp_sd() {
58016        let a = _mm_set1_pd(2.);
58017        let b = _mm_set1_pd(3.);
58018        let r = _mm_maskz_getexp_sd(0, a, b);
58019        let e = _mm_set_pd(2., 0.);
58020        assert_eq_m128d(r, e);
58021        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
58022        let e = _mm_set_pd(2., 1.);
58023        assert_eq_m128d(r, e);
58024    }
58025
58026    #[simd_test(enable = "avx512f")]
58027    unsafe fn test_mm_getmant_ss() {
58028        let a = _mm_set1_ps(20.);
58029        let b = _mm_set1_ps(10.);
58030        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58031        let e = _mm_set_ps(20., 20., 20., 1.25);
58032        assert_eq_m128(r, e);
58033    }
58034
58035    #[simd_test(enable = "avx512f")]
58036    unsafe fn test_mm_mask_getmant_ss() {
58037        let a = _mm_set1_ps(20.);
58038        let b = _mm_set1_ps(10.);
58039        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58040        let e = _mm_set_ps(20., 20., 20., 20.);
58041        assert_eq_m128(r, e);
58042        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58043        let e = _mm_set_ps(20., 20., 20., 1.25);
58044        assert_eq_m128(r, e);
58045    }
58046
58047    #[simd_test(enable = "avx512f")]
58048    unsafe fn test_mm_maskz_getmant_ss() {
58049        let a = _mm_set1_ps(20.);
58050        let b = _mm_set1_ps(10.);
58051        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58052        let e = _mm_set_ps(20., 20., 20., 0.);
58053        assert_eq_m128(r, e);
58054        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58055        let e = _mm_set_ps(20., 20., 20., 1.25);
58056        assert_eq_m128(r, e);
58057    }
58058
58059    #[simd_test(enable = "avx512f")]
58060    unsafe fn test_mm_getmant_sd() {
58061        let a = _mm_set1_pd(20.);
58062        let b = _mm_set1_pd(10.);
58063        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58064        let e = _mm_set_pd(20., 1.25);
58065        assert_eq_m128d(r, e);
58066    }
58067
58068    #[simd_test(enable = "avx512f")]
58069    unsafe fn test_mm_mask_getmant_sd() {
58070        let a = _mm_set1_pd(20.);
58071        let b = _mm_set1_pd(10.);
58072        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58073        let e = _mm_set_pd(20., 20.);
58074        assert_eq_m128d(r, e);
58075        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58076        let e = _mm_set_pd(20., 1.25);
58077        assert_eq_m128d(r, e);
58078    }
58079
58080    #[simd_test(enable = "avx512f")]
58081    unsafe fn test_mm_maskz_getmant_sd() {
58082        let a = _mm_set1_pd(20.);
58083        let b = _mm_set1_pd(10.);
58084        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58085        let e = _mm_set_pd(20., 0.);
58086        assert_eq_m128d(r, e);
58087        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58088        let e = _mm_set_pd(20., 1.25);
58089        assert_eq_m128d(r, e);
58090    }
58091
58092    #[simd_test(enable = "avx512f")]
58093    unsafe fn test_mm_roundscale_ss() {
58094        let a = _mm_set1_ps(2.2);
58095        let b = _mm_set1_ps(1.1);
58096        let r = _mm_roundscale_ss::<0>(a, b);
58097        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58098        assert_eq_m128(r, e);
58099    }
58100
58101    #[simd_test(enable = "avx512f")]
58102    unsafe fn test_mm_mask_roundscale_ss() {
58103        let a = _mm_set1_ps(2.2);
58104        let b = _mm_set1_ps(1.1);
58105        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
58106        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
58107        assert_eq_m128(r, e);
58108        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
58109        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58110        assert_eq_m128(r, e);
58111    }
58112
58113    #[simd_test(enable = "avx512f")]
58114    unsafe fn test_mm_maskz_roundscale_ss() {
58115        let a = _mm_set1_ps(2.2);
58116        let b = _mm_set1_ps(1.1);
58117        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
58118        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
58119        assert_eq_m128(r, e);
58120        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
58121        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58122        assert_eq_m128(r, e);
58123    }
58124
58125    #[simd_test(enable = "avx512f")]
58126    unsafe fn test_mm_roundscale_sd() {
58127        let a = _mm_set1_pd(2.2);
58128        let b = _mm_set1_pd(1.1);
58129        let r = _mm_roundscale_sd::<0>(a, b);
58130        let e = _mm_set_pd(2.2, 1.0);
58131        assert_eq_m128d(r, e);
58132    }
58133
58134    #[simd_test(enable = "avx512f")]
58135    unsafe fn test_mm_mask_roundscale_sd() {
58136        let a = _mm_set1_pd(2.2);
58137        let b = _mm_set1_pd(1.1);
58138        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
58139        let e = _mm_set_pd(2.2, 2.2);
58140        assert_eq_m128d(r, e);
58141        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
58142        let e = _mm_set_pd(2.2, 1.0);
58143        assert_eq_m128d(r, e);
58144    }
58145
58146    #[simd_test(enable = "avx512f")]
58147    unsafe fn test_mm_maskz_roundscale_sd() {
58148        let a = _mm_set1_pd(2.2);
58149        let b = _mm_set1_pd(1.1);
58150        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
58151        let e = _mm_set_pd(2.2, 0.0);
58152        assert_eq_m128d(r, e);
58153        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
58154        let e = _mm_set_pd(2.2, 1.0);
58155        assert_eq_m128d(r, e);
58156    }
58157
58158    #[simd_test(enable = "avx512f")]
58159    unsafe fn test_mm_scalef_ss() {
58160        let a = _mm_set1_ps(1.);
58161        let b = _mm_set1_ps(3.);
58162        let r = _mm_scalef_ss(a, b);
58163        let e = _mm_set_ps(1., 1., 1., 8.);
58164        assert_eq_m128(r, e);
58165    }
58166
58167    #[simd_test(enable = "avx512f")]
58168    unsafe fn test_mm_mask_scalef_ss() {
58169        let a = _mm_set1_ps(1.);
58170        let b = _mm_set1_ps(3.);
58171        let r = _mm_mask_scalef_ss(a, 0, a, b);
58172        let e = _mm_set_ps(1., 1., 1., 1.);
58173        assert_eq_m128(r, e);
58174        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
58175        let e = _mm_set_ps(1., 1., 1., 8.);
58176        assert_eq_m128(r, e);
58177    }
58178
58179    #[simd_test(enable = "avx512f")]
58180    unsafe fn test_mm_maskz_scalef_ss() {
58181        let a = _mm_set1_ps(1.);
58182        let b = _mm_set1_ps(3.);
58183        let r = _mm_maskz_scalef_ss(0, a, b);
58184        let e = _mm_set_ps(1., 1., 1., 0.);
58185        assert_eq_m128(r, e);
58186        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
58187        let e = _mm_set_ps(1., 1., 1., 8.);
58188        assert_eq_m128(r, e);
58189    }
58190
58191    #[simd_test(enable = "avx512f")]
58192    unsafe fn test_mm_scalef_sd() {
58193        let a = _mm_set1_pd(1.);
58194        let b = _mm_set1_pd(3.);
58195        let r = _mm_scalef_sd(a, b);
58196        let e = _mm_set_pd(1., 8.);
58197        assert_eq_m128d(r, e);
58198    }
58199
58200    #[simd_test(enable = "avx512f")]
58201    unsafe fn test_mm_mask_scalef_sd() {
58202        let a = _mm_set1_pd(1.);
58203        let b = _mm_set1_pd(3.);
58204        let r = _mm_mask_scalef_sd(a, 0, a, b);
58205        let e = _mm_set_pd(1., 1.);
58206        assert_eq_m128d(r, e);
58207        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
58208        let e = _mm_set_pd(1., 8.);
58209        assert_eq_m128d(r, e);
58210    }
58211
58212    #[simd_test(enable = "avx512f")]
58213    unsafe fn test_mm_maskz_scalef_sd() {
58214        let a = _mm_set1_pd(1.);
58215        let b = _mm_set1_pd(3.);
58216        let r = _mm_maskz_scalef_sd(0, a, b);
58217        let e = _mm_set_pd(1., 0.);
58218        assert_eq_m128d(r, e);
58219        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
58220        let e = _mm_set_pd(1., 8.);
58221        assert_eq_m128d(r, e);
58222    }
58223
58224    #[simd_test(enable = "avx512f")]
58225    unsafe fn test_mm_mask_fmadd_ss() {
58226        let a = _mm_set1_ps(1.);
58227        let b = _mm_set1_ps(2.);
58228        let c = _mm_set1_ps(3.);
58229        let r = _mm_mask_fmadd_ss(a, 0, b, c);
58230        assert_eq_m128(r, a);
58231        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
58232        let e = _mm_set_ps(1., 1., 1., 5.);
58233        assert_eq_m128(r, e);
58234    }
58235
58236    #[simd_test(enable = "avx512f")]
58237    unsafe fn test_mm_maskz_fmadd_ss() {
58238        let a = _mm_set1_ps(1.);
58239        let b = _mm_set1_ps(2.);
58240        let c = _mm_set1_ps(3.);
58241        let r = _mm_maskz_fmadd_ss(0, a, b, c);
58242        let e = _mm_set_ps(1., 1., 1., 0.);
58243        assert_eq_m128(r, e);
58244        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
58245        let e = _mm_set_ps(1., 1., 1., 5.);
58246        assert_eq_m128(r, e);
58247    }
58248
58249    #[simd_test(enable = "avx512f")]
58250    unsafe fn test_mm_mask3_fmadd_ss() {
58251        let a = _mm_set1_ps(1.);
58252        let b = _mm_set1_ps(2.);
58253        let c = _mm_set1_ps(3.);
58254        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
58255        assert_eq_m128(r, c);
58256        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
58257        let e = _mm_set_ps(3., 3., 3., 5.);
58258        assert_eq_m128(r, e);
58259    }
58260
58261    #[simd_test(enable = "avx512f")]
58262    unsafe fn test_mm_mask_fmadd_sd() {
58263        let a = _mm_set1_pd(1.);
58264        let b = _mm_set1_pd(2.);
58265        let c = _mm_set1_pd(3.);
58266        let r = _mm_mask_fmadd_sd(a, 0, b, c);
58267        assert_eq_m128d(r, a);
58268        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
58269        let e = _mm_set_pd(1., 5.);
58270        assert_eq_m128d(r, e);
58271    }
58272
58273    #[simd_test(enable = "avx512f")]
58274    unsafe fn test_mm_maskz_fmadd_sd() {
58275        let a = _mm_set1_pd(1.);
58276        let b = _mm_set1_pd(2.);
58277        let c = _mm_set1_pd(3.);
58278        let r = _mm_maskz_fmadd_sd(0, a, b, c);
58279        let e = _mm_set_pd(1., 0.);
58280        assert_eq_m128d(r, e);
58281        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
58282        let e = _mm_set_pd(1., 5.);
58283        assert_eq_m128d(r, e);
58284    }
58285
58286    #[simd_test(enable = "avx512f")]
58287    unsafe fn test_mm_mask3_fmadd_sd() {
58288        let a = _mm_set1_pd(1.);
58289        let b = _mm_set1_pd(2.);
58290        let c = _mm_set1_pd(3.);
58291        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
58292        assert_eq_m128d(r, c);
58293        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
58294        let e = _mm_set_pd(3., 5.);
58295        assert_eq_m128d(r, e);
58296    }
58297
58298    #[simd_test(enable = "avx512f")]
58299    unsafe fn test_mm_mask_fmsub_ss() {
58300        let a = _mm_set1_ps(1.);
58301        let b = _mm_set1_ps(2.);
58302        let c = _mm_set1_ps(3.);
58303        let r = _mm_mask_fmsub_ss(a, 0, b, c);
58304        assert_eq_m128(r, a);
58305        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
58306        let e = _mm_set_ps(1., 1., 1., -1.);
58307        assert_eq_m128(r, e);
58308    }
58309
58310    #[simd_test(enable = "avx512f")]
58311    unsafe fn test_mm_maskz_fmsub_ss() {
58312        let a = _mm_set1_ps(1.);
58313        let b = _mm_set1_ps(2.);
58314        let c = _mm_set1_ps(3.);
58315        let r = _mm_maskz_fmsub_ss(0, a, b, c);
58316        let e = _mm_set_ps(1., 1., 1., 0.);
58317        assert_eq_m128(r, e);
58318        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
58319        let e = _mm_set_ps(1., 1., 1., -1.);
58320        assert_eq_m128(r, e);
58321    }
58322
58323    #[simd_test(enable = "avx512f")]
58324    unsafe fn test_mm_mask3_fmsub_ss() {
58325        let a = _mm_set1_ps(1.);
58326        let b = _mm_set1_ps(2.);
58327        let c = _mm_set1_ps(3.);
58328        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
58329        assert_eq_m128(r, c);
58330        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
58331        let e = _mm_set_ps(3., 3., 3., -1.);
58332        assert_eq_m128(r, e);
58333    }
58334
58335    #[simd_test(enable = "avx512f")]
58336    unsafe fn test_mm_mask_fmsub_sd() {
58337        let a = _mm_set1_pd(1.);
58338        let b = _mm_set1_pd(2.);
58339        let c = _mm_set1_pd(3.);
58340        let r = _mm_mask_fmsub_sd(a, 0, b, c);
58341        assert_eq_m128d(r, a);
58342        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
58343        let e = _mm_set_pd(1., -1.);
58344        assert_eq_m128d(r, e);
58345    }
58346
58347    #[simd_test(enable = "avx512f")]
58348    unsafe fn test_mm_maskz_fmsub_sd() {
58349        let a = _mm_set1_pd(1.);
58350        let b = _mm_set1_pd(2.);
58351        let c = _mm_set1_pd(3.);
58352        let r = _mm_maskz_fmsub_sd(0, a, b, c);
58353        let e = _mm_set_pd(1., 0.);
58354        assert_eq_m128d(r, e);
58355        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
58356        let e = _mm_set_pd(1., -1.);
58357        assert_eq_m128d(r, e);
58358    }
58359
58360    #[simd_test(enable = "avx512f")]
58361    unsafe fn test_mm_mask3_fmsub_sd() {
58362        let a = _mm_set1_pd(1.);
58363        let b = _mm_set1_pd(2.);
58364        let c = _mm_set1_pd(3.);
58365        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
58366        assert_eq_m128d(r, c);
58367        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
58368        let e = _mm_set_pd(3., -1.);
58369        assert_eq_m128d(r, e);
58370    }
58371
58372    #[simd_test(enable = "avx512f")]
58373    unsafe fn test_mm_mask_fnmadd_ss() {
58374        let a = _mm_set1_ps(1.);
58375        let b = _mm_set1_ps(2.);
58376        let c = _mm_set1_ps(3.);
58377        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
58378        assert_eq_m128(r, a);
58379        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
58380        let e = _mm_set_ps(1., 1., 1., 1.);
58381        assert_eq_m128(r, e);
58382    }
58383
58384    #[simd_test(enable = "avx512f")]
58385    unsafe fn test_mm_maskz_fnmadd_ss() {
58386        let a = _mm_set1_ps(1.);
58387        let b = _mm_set1_ps(2.);
58388        let c = _mm_set1_ps(3.);
58389        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
58390        let e = _mm_set_ps(1., 1., 1., 0.);
58391        assert_eq_m128(r, e);
58392        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
58393        let e = _mm_set_ps(1., 1., 1., 1.);
58394        assert_eq_m128(r, e);
58395    }
58396
58397    #[simd_test(enable = "avx512f")]
58398    unsafe fn test_mm_mask3_fnmadd_ss() {
58399        let a = _mm_set1_ps(1.);
58400        let b = _mm_set1_ps(2.);
58401        let c = _mm_set1_ps(3.);
58402        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
58403        assert_eq_m128(r, c);
58404        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
58405        let e = _mm_set_ps(3., 3., 3., 1.);
58406        assert_eq_m128(r, e);
58407    }
58408
58409    #[simd_test(enable = "avx512f")]
58410    unsafe fn test_mm_mask_fnmadd_sd() {
58411        let a = _mm_set1_pd(1.);
58412        let b = _mm_set1_pd(2.);
58413        let c = _mm_set1_pd(3.);
58414        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
58415        assert_eq_m128d(r, a);
58416        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
58417        let e = _mm_set_pd(1., 1.);
58418        assert_eq_m128d(r, e);
58419    }
58420
58421    #[simd_test(enable = "avx512f")]
58422    unsafe fn test_mm_maskz_fnmadd_sd() {
58423        let a = _mm_set1_pd(1.);
58424        let b = _mm_set1_pd(2.);
58425        let c = _mm_set1_pd(3.);
58426        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
58427        let e = _mm_set_pd(1., 0.);
58428        assert_eq_m128d(r, e);
58429        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
58430        let e = _mm_set_pd(1., 1.);
58431        assert_eq_m128d(r, e);
58432    }
58433
58434    #[simd_test(enable = "avx512f")]
58435    unsafe fn test_mm_mask3_fnmadd_sd() {
58436        let a = _mm_set1_pd(1.);
58437        let b = _mm_set1_pd(2.);
58438        let c = _mm_set1_pd(3.);
58439        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
58440        assert_eq_m128d(r, c);
58441        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
58442        let e = _mm_set_pd(3., 1.);
58443        assert_eq_m128d(r, e);
58444    }
58445
58446    #[simd_test(enable = "avx512f")]
58447    unsafe fn test_mm_mask_fnmsub_ss() {
58448        let a = _mm_set1_ps(1.);
58449        let b = _mm_set1_ps(2.);
58450        let c = _mm_set1_ps(3.);
58451        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
58452        assert_eq_m128(r, a);
58453        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
58454        let e = _mm_set_ps(1., 1., 1., -5.);
58455        assert_eq_m128(r, e);
58456    }
58457
58458    #[simd_test(enable = "avx512f")]
58459    unsafe fn test_mm_maskz_fnmsub_ss() {
58460        let a = _mm_set1_ps(1.);
58461        let b = _mm_set1_ps(2.);
58462        let c = _mm_set1_ps(3.);
58463        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
58464        let e = _mm_set_ps(1., 1., 1., 0.);
58465        assert_eq_m128(r, e);
58466        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
58467        let e = _mm_set_ps(1., 1., 1., -5.);
58468        assert_eq_m128(r, e);
58469    }
58470
58471    #[simd_test(enable = "avx512f")]
58472    unsafe fn test_mm_mask3_fnmsub_ss() {
58473        let a = _mm_set1_ps(1.);
58474        let b = _mm_set1_ps(2.);
58475        let c = _mm_set1_ps(3.);
58476        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
58477        assert_eq_m128(r, c);
58478        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
58479        let e = _mm_set_ps(3., 3., 3., -5.);
58480        assert_eq_m128(r, e);
58481    }
58482
58483    #[simd_test(enable = "avx512f")]
58484    unsafe fn test_mm_mask_fnmsub_sd() {
58485        let a = _mm_set1_pd(1.);
58486        let b = _mm_set1_pd(2.);
58487        let c = _mm_set1_pd(3.);
58488        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
58489        assert_eq_m128d(r, a);
58490        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
58491        let e = _mm_set_pd(1., -5.);
58492        assert_eq_m128d(r, e);
58493    }
58494
58495    #[simd_test(enable = "avx512f")]
58496    unsafe fn test_mm_maskz_fnmsub_sd() {
58497        let a = _mm_set1_pd(1.);
58498        let b = _mm_set1_pd(2.);
58499        let c = _mm_set1_pd(3.);
58500        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
58501        let e = _mm_set_pd(1., 0.);
58502        assert_eq_m128d(r, e);
58503        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
58504        let e = _mm_set_pd(1., -5.);
58505        assert_eq_m128d(r, e);
58506    }
58507
58508    #[simd_test(enable = "avx512f")]
58509    unsafe fn test_mm_mask3_fnmsub_sd() {
58510        let a = _mm_set1_pd(1.);
58511        let b = _mm_set1_pd(2.);
58512        let c = _mm_set1_pd(3.);
58513        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
58514        assert_eq_m128d(r, c);
58515        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
58516        let e = _mm_set_pd(3., -5.);
58517        assert_eq_m128d(r, e);
58518    }
58519
58520    #[simd_test(enable = "avx512f")]
58521    unsafe fn test_mm_add_round_ss() {
58522        let a = _mm_set_ps(1., 2., 10., 20.);
58523        let b = _mm_set_ps(3., 4., 30., 40.);
58524        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58525        let e = _mm_set_ps(1., 2., 10., 60.);
58526        assert_eq_m128(r, e);
58527    }
58528
58529    #[simd_test(enable = "avx512f")]
58530    unsafe fn test_mm_mask_add_round_ss() {
58531        let src = _mm_set_ps(10., 11., 100., 110.);
58532        let a = _mm_set_ps(1., 2., 10., 20.);
58533        let b = _mm_set_ps(3., 4., 30., 40.);
58534        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58535        let e = _mm_set_ps(1., 2., 10., 110.);
58536        assert_eq_m128(r, e);
58537        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58538            src, 0b11111111, a, b,
58539        );
58540        let e = _mm_set_ps(1., 2., 10., 60.);
58541        assert_eq_m128(r, e);
58542    }
58543
58544    #[simd_test(enable = "avx512f")]
58545    unsafe fn test_mm_maskz_add_round_ss() {
58546        let a = _mm_set_ps(1., 2., 10., 20.);
58547        let b = _mm_set_ps(3., 4., 30., 40.);
58548        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58549        let e = _mm_set_ps(1., 2., 10., 0.);
58550        assert_eq_m128(r, e);
58551        let r =
58552            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58553        let e = _mm_set_ps(1., 2., 10., 60.);
58554        assert_eq_m128(r, e);
58555    }
58556
58557    #[simd_test(enable = "avx512f")]
58558    unsafe fn test_mm_add_round_sd() {
58559        let a = _mm_set_pd(1., 2.);
58560        let b = _mm_set_pd(3., 4.);
58561        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58562        let e = _mm_set_pd(1., 6.);
58563        assert_eq_m128d(r, e);
58564    }
58565
58566    #[simd_test(enable = "avx512f")]
58567    unsafe fn test_mm_mask_add_round_sd() {
58568        let src = _mm_set_pd(10., 11.);
58569        let a = _mm_set_pd(1., 2.);
58570        let b = _mm_set_pd(3., 4.);
58571        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58572        let e = _mm_set_pd(1., 11.);
58573        assert_eq_m128d(r, e);
58574        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58575            src, 0b11111111, a, b,
58576        );
58577        let e = _mm_set_pd(1., 6.);
58578        assert_eq_m128d(r, e);
58579    }
58580
58581    #[simd_test(enable = "avx512f")]
58582    unsafe fn test_mm_maskz_add_round_sd() {
58583        let a = _mm_set_pd(1., 2.);
58584        let b = _mm_set_pd(3., 4.);
58585        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58586        let e = _mm_set_pd(1., 0.);
58587        assert_eq_m128d(r, e);
58588        let r =
58589            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58590        let e = _mm_set_pd(1., 6.);
58591        assert_eq_m128d(r, e);
58592    }
58593
58594    #[simd_test(enable = "avx512f")]
58595    unsafe fn test_mm_sub_round_ss() {
58596        let a = _mm_set_ps(1., 2., 10., 20.);
58597        let b = _mm_set_ps(3., 4., 30., 40.);
58598        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58599        let e = _mm_set_ps(1., 2., 10., -20.);
58600        assert_eq_m128(r, e);
58601    }
58602
58603    #[simd_test(enable = "avx512f")]
58604    unsafe fn test_mm_mask_sub_round_ss() {
58605        let src = _mm_set_ps(10., 11., 100., 110.);
58606        let a = _mm_set_ps(1., 2., 10., 20.);
58607        let b = _mm_set_ps(3., 4., 30., 40.);
58608        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58609        let e = _mm_set_ps(1., 2., 10., 110.);
58610        assert_eq_m128(r, e);
58611        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58612            src, 0b11111111, a, b,
58613        );
58614        let e = _mm_set_ps(1., 2., 10., -20.);
58615        assert_eq_m128(r, e);
58616    }
58617
58618    #[simd_test(enable = "avx512f")]
58619    unsafe fn test_mm_maskz_sub_round_ss() {
58620        let a = _mm_set_ps(1., 2., 10., 20.);
58621        let b = _mm_set_ps(3., 4., 30., 40.);
58622        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58623        let e = _mm_set_ps(1., 2., 10., 0.);
58624        assert_eq_m128(r, e);
58625        let r =
58626            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58627        let e = _mm_set_ps(1., 2., 10., -20.);
58628        assert_eq_m128(r, e);
58629    }
58630
58631    #[simd_test(enable = "avx512f")]
58632    unsafe fn test_mm_sub_round_sd() {
58633        let a = _mm_set_pd(1., 2.);
58634        let b = _mm_set_pd(3., 4.);
58635        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58636        let e = _mm_set_pd(1., -2.);
58637        assert_eq_m128d(r, e);
58638    }
58639
58640    #[simd_test(enable = "avx512f")]
58641    unsafe fn test_mm_mask_sub_round_sd() {
58642        let src = _mm_set_pd(10., 11.);
58643        let a = _mm_set_pd(1., 2.);
58644        let b = _mm_set_pd(3., 4.);
58645        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58646        let e = _mm_set_pd(1., 11.);
58647        assert_eq_m128d(r, e);
58648        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58649            src, 0b11111111, a, b,
58650        );
58651        let e = _mm_set_pd(1., -2.);
58652        assert_eq_m128d(r, e);
58653    }
58654
58655    #[simd_test(enable = "avx512f")]
58656    unsafe fn test_mm_maskz_sub_round_sd() {
58657        let a = _mm_set_pd(1., 2.);
58658        let b = _mm_set_pd(3., 4.);
58659        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58660        let e = _mm_set_pd(1., 0.);
58661        assert_eq_m128d(r, e);
58662        let r =
58663            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58664        let e = _mm_set_pd(1., -2.);
58665        assert_eq_m128d(r, e);
58666    }
58667
58668    #[simd_test(enable = "avx512f")]
58669    unsafe fn test_mm_mul_round_ss() {
58670        let a = _mm_set_ps(1., 2., 10., 20.);
58671        let b = _mm_set_ps(3., 4., 30., 40.);
58672        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58673        let e = _mm_set_ps(1., 2., 10., 800.);
58674        assert_eq_m128(r, e);
58675    }
58676
58677    #[simd_test(enable = "avx512f")]
58678    unsafe fn test_mm_mask_mul_round_ss() {
58679        let src = _mm_set_ps(10., 11., 100., 110.);
58680        let a = _mm_set_ps(1., 2., 10., 20.);
58681        let b = _mm_set_ps(3., 4., 30., 40.);
58682        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58683        let e = _mm_set_ps(1., 2., 10., 110.);
58684        assert_eq_m128(r, e);
58685        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58686            src, 0b11111111, a, b,
58687        );
58688        let e = _mm_set_ps(1., 2., 10., 800.);
58689        assert_eq_m128(r, e);
58690    }
58691
58692    #[simd_test(enable = "avx512f")]
58693    unsafe fn test_mm_maskz_mul_round_ss() {
58694        let a = _mm_set_ps(1., 2., 10., 20.);
58695        let b = _mm_set_ps(3., 4., 30., 40.);
58696        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58697        let e = _mm_set_ps(1., 2., 10., 0.);
58698        assert_eq_m128(r, e);
58699        let r =
58700            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58701        let e = _mm_set_ps(1., 2., 10., 800.);
58702        assert_eq_m128(r, e);
58703    }
58704
58705    #[simd_test(enable = "avx512f")]
58706    unsafe fn test_mm_mul_round_sd() {
58707        let a = _mm_set_pd(1., 2.);
58708        let b = _mm_set_pd(3., 4.);
58709        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58710        let e = _mm_set_pd(1., 8.);
58711        assert_eq_m128d(r, e);
58712    }
58713
58714    #[simd_test(enable = "avx512f")]
58715    unsafe fn test_mm_mask_mul_round_sd() {
58716        let src = _mm_set_pd(10., 11.);
58717        let a = _mm_set_pd(1., 2.);
58718        let b = _mm_set_pd(3., 4.);
58719        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58720        let e = _mm_set_pd(1., 11.);
58721        assert_eq_m128d(r, e);
58722        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58723            src, 0b11111111, a, b,
58724        );
58725        let e = _mm_set_pd(1., 8.);
58726        assert_eq_m128d(r, e);
58727    }
58728
58729    #[simd_test(enable = "avx512f")]
58730    unsafe fn test_mm_maskz_mul_round_sd() {
58731        let a = _mm_set_pd(1., 2.);
58732        let b = _mm_set_pd(3., 4.);
58733        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58734        let e = _mm_set_pd(1., 0.);
58735        assert_eq_m128d(r, e);
58736        let r =
58737            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58738        let e = _mm_set_pd(1., 8.);
58739        assert_eq_m128d(r, e);
58740    }
58741
58742    #[simd_test(enable = "avx512f")]
58743    unsafe fn test_mm_div_round_ss() {
58744        let a = _mm_set_ps(1., 2., 10., 20.);
58745        let b = _mm_set_ps(3., 4., 30., 40.);
58746        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58747        let e = _mm_set_ps(1., 2., 10., 0.5);
58748        assert_eq_m128(r, e);
58749    }
58750
58751    #[simd_test(enable = "avx512f")]
58752    unsafe fn test_mm_mask_div_round_ss() {
58753        let src = _mm_set_ps(10., 11., 100., 110.);
58754        let a = _mm_set_ps(1., 2., 10., 20.);
58755        let b = _mm_set_ps(3., 4., 30., 40.);
58756        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58757        let e = _mm_set_ps(1., 2., 10., 110.);
58758        assert_eq_m128(r, e);
58759        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58760            src, 0b11111111, a, b,
58761        );
58762        let e = _mm_set_ps(1., 2., 10., 0.5);
58763        assert_eq_m128(r, e);
58764    }
58765
58766    #[simd_test(enable = "avx512f")]
58767    unsafe fn test_mm_maskz_div_round_ss() {
58768        let a = _mm_set_ps(1., 2., 10., 20.);
58769        let b = _mm_set_ps(3., 4., 30., 40.);
58770        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58771        let e = _mm_set_ps(1., 2., 10., 0.);
58772        assert_eq_m128(r, e);
58773        let r =
58774            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58775        let e = _mm_set_ps(1., 2., 10., 0.5);
58776        assert_eq_m128(r, e);
58777    }
58778
58779    #[simd_test(enable = "avx512f")]
58780    unsafe fn test_mm_div_round_sd() {
58781        let a = _mm_set_pd(1., 2.);
58782        let b = _mm_set_pd(3., 4.);
58783        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58784        let e = _mm_set_pd(1., 0.5);
58785        assert_eq_m128d(r, e);
58786    }
58787
58788    #[simd_test(enable = "avx512f")]
58789    unsafe fn test_mm_mask_div_round_sd() {
58790        let src = _mm_set_pd(10., 11.);
58791        let a = _mm_set_pd(1., 2.);
58792        let b = _mm_set_pd(3., 4.);
58793        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58794        let e = _mm_set_pd(1., 11.);
58795        assert_eq_m128d(r, e);
58796        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58797            src, 0b11111111, a, b,
58798        );
58799        let e = _mm_set_pd(1., 0.5);
58800        assert_eq_m128d(r, e);
58801    }
58802
58803    #[simd_test(enable = "avx512f")]
58804    unsafe fn test_mm_maskz_div_round_sd() {
58805        let a = _mm_set_pd(1., 2.);
58806        let b = _mm_set_pd(3., 4.);
58807        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58808        let e = _mm_set_pd(1., 0.);
58809        assert_eq_m128d(r, e);
58810        let r =
58811            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58812        let e = _mm_set_pd(1., 0.5);
58813        assert_eq_m128d(r, e);
58814    }
58815
58816    #[simd_test(enable = "avx512f")]
58817    unsafe fn test_mm_max_round_ss() {
58818        let a = _mm_set_ps(0., 1., 2., 3.);
58819        let b = _mm_set_ps(4., 5., 6., 7.);
58820        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58821        let e = _mm_set_ps(0., 1., 2., 7.);
58822        assert_eq_m128(r, e);
58823    }
58824
58825    #[simd_test(enable = "avx512f")]
58826    unsafe fn test_mm_mask_max_round_ss() {
58827        let a = _mm_set_ps(0., 1., 2., 3.);
58828        let b = _mm_set_ps(4., 5., 6., 7.);
58829        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58830        let e = _mm_set_ps(0., 1., 2., 3.);
58831        assert_eq_m128(r, e);
58832        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58833        let e = _mm_set_ps(0., 1., 2., 7.);
58834        assert_eq_m128(r, e);
58835    }
58836
58837    #[simd_test(enable = "avx512f")]
58838    unsafe fn test_mm_maskz_max_round_ss() {
58839        let a = _mm_set_ps(0., 1., 2., 3.);
58840        let b = _mm_set_ps(4., 5., 6., 7.);
58841        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58842        let e = _mm_set_ps(0., 1., 2., 0.);
58843        assert_eq_m128(r, e);
58844        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58845        let e = _mm_set_ps(0., 1., 2., 7.);
58846        assert_eq_m128(r, e);
58847    }
58848
58849    #[simd_test(enable = "avx512f")]
58850    unsafe fn test_mm_max_round_sd() {
58851        let a = _mm_set_pd(0., 1.);
58852        let b = _mm_set_pd(2., 3.);
58853        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58854        let e = _mm_set_pd(0., 3.);
58855        assert_eq_m128d(r, e);
58856    }
58857
58858    #[simd_test(enable = "avx512f")]
58859    unsafe fn test_mm_mask_max_round_sd() {
58860        let a = _mm_set_pd(0., 1.);
58861        let b = _mm_set_pd(2., 3.);
58862        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58863        let e = _mm_set_pd(0., 1.);
58864        assert_eq_m128d(r, e);
58865        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58866        let e = _mm_set_pd(0., 3.);
58867        assert_eq_m128d(r, e);
58868    }
58869
58870    #[simd_test(enable = "avx512f")]
58871    unsafe fn test_mm_maskz_max_round_sd() {
58872        let a = _mm_set_pd(0., 1.);
58873        let b = _mm_set_pd(2., 3.);
58874        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58875        let e = _mm_set_pd(0., 0.);
58876        assert_eq_m128d(r, e);
58877        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58878        let e = _mm_set_pd(0., 3.);
58879        assert_eq_m128d(r, e);
58880    }
58881
58882    #[simd_test(enable = "avx512f")]
58883    unsafe fn test_mm_min_round_ss() {
58884        let a = _mm_set_ps(0., 1., 2., 3.);
58885        let b = _mm_set_ps(4., 5., 6., 7.);
58886        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58887        let e = _mm_set_ps(0., 1., 2., 3.);
58888        assert_eq_m128(r, e);
58889    }
58890
58891    #[simd_test(enable = "avx512f")]
58892    unsafe fn test_mm_mask_min_round_ss() {
58893        let a = _mm_set_ps(0., 1., 2., 3.);
58894        let b = _mm_set_ps(4., 5., 6., 7.);
58895        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58896        let e = _mm_set_ps(0., 1., 2., 3.);
58897        assert_eq_m128(r, e);
58898        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58899        let e = _mm_set_ps(0., 1., 2., 3.);
58900        assert_eq_m128(r, e);
58901    }
58902
58903    #[simd_test(enable = "avx512f")]
58904    unsafe fn test_mm_maskz_min_round_ss() {
58905        let a = _mm_set_ps(0., 1., 2., 3.);
58906        let b = _mm_set_ps(4., 5., 6., 7.);
58907        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58908        let e = _mm_set_ps(0., 1., 2., 0.);
58909        assert_eq_m128(r, e);
58910        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58911        let e = _mm_set_ps(0., 1., 2., 3.);
58912        assert_eq_m128(r, e);
58913    }
58914
58915    #[simd_test(enable = "avx512f")]
58916    unsafe fn test_mm_min_round_sd() {
58917        let a = _mm_set_pd(0., 1.);
58918        let b = _mm_set_pd(2., 3.);
58919        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58920        let e = _mm_set_pd(0., 1.);
58921        assert_eq_m128d(r, e);
58922    }
58923
58924    #[simd_test(enable = "avx512f")]
58925    unsafe fn test_mm_mask_min_round_sd() {
58926        let a = _mm_set_pd(0., 1.);
58927        let b = _mm_set_pd(2., 3.);
58928        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58929        let e = _mm_set_pd(0., 1.);
58930        assert_eq_m128d(r, e);
58931        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58932        let e = _mm_set_pd(0., 1.);
58933        assert_eq_m128d(r, e);
58934    }
58935
58936    #[simd_test(enable = "avx512f")]
58937    unsafe fn test_mm_maskz_min_round_sd() {
58938        let a = _mm_set_pd(0., 1.);
58939        let b = _mm_set_pd(2., 3.);
58940        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58941        let e = _mm_set_pd(0., 0.);
58942        assert_eq_m128d(r, e);
58943        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58944        let e = _mm_set_pd(0., 1.);
58945        assert_eq_m128d(r, e);
58946    }
58947
58948    #[simd_test(enable = "avx512f")]
58949    unsafe fn test_mm_sqrt_round_ss() {
58950        let a = _mm_set_ps(1., 2., 10., 20.);
58951        let b = _mm_set_ps(3., 4., 30., 4.);
58952        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58953        let e = _mm_set_ps(1., 2., 10., 2.);
58954        assert_eq_m128(r, e);
58955    }
58956
58957    #[simd_test(enable = "avx512f")]
58958    unsafe fn test_mm_mask_sqrt_round_ss() {
58959        let src = _mm_set_ps(10., 11., 100., 110.);
58960        let a = _mm_set_ps(1., 2., 10., 20.);
58961        let b = _mm_set_ps(3., 4., 30., 4.);
58962        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58963        let e = _mm_set_ps(1., 2., 10., 110.);
58964        assert_eq_m128(r, e);
58965        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58966            src, 0b11111111, a, b,
58967        );
58968        let e = _mm_set_ps(1., 2., 10., 2.);
58969        assert_eq_m128(r, e);
58970    }
58971
58972    #[simd_test(enable = "avx512f")]
58973    unsafe fn test_mm_maskz_sqrt_round_ss() {
58974        let a = _mm_set_ps(1., 2., 10., 20.);
58975        let b = _mm_set_ps(3., 4., 30., 4.);
58976        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58977        let e = _mm_set_ps(1., 2., 10., 0.);
58978        assert_eq_m128(r, e);
58979        let r =
58980            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58981        let e = _mm_set_ps(1., 2., 10., 2.);
58982        assert_eq_m128(r, e);
58983    }
58984
58985    #[simd_test(enable = "avx512f")]
58986    unsafe fn test_mm_sqrt_round_sd() {
58987        let a = _mm_set_pd(1., 2.);
58988        let b = _mm_set_pd(3., 4.);
58989        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58990        let e = _mm_set_pd(1., 2.);
58991        assert_eq_m128d(r, e);
58992    }
58993
58994    #[simd_test(enable = "avx512f")]
58995    unsafe fn test_mm_mask_sqrt_round_sd() {
58996        let src = _mm_set_pd(10., 11.);
58997        let a = _mm_set_pd(1., 2.);
58998        let b = _mm_set_pd(3., 4.);
58999        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
59000        let e = _mm_set_pd(1., 11.);
59001        assert_eq_m128d(r, e);
59002        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
59003            src, 0b11111111, a, b,
59004        );
59005        let e = _mm_set_pd(1., 2.);
59006        assert_eq_m128d(r, e);
59007    }
59008
59009    #[simd_test(enable = "avx512f")]
59010    unsafe fn test_mm_maskz_sqrt_round_sd() {
59011        let a = _mm_set_pd(1., 2.);
59012        let b = _mm_set_pd(3., 4.);
59013        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59014        let e = _mm_set_pd(1., 0.);
59015        assert_eq_m128d(r, e);
59016        let r =
59017            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59018        let e = _mm_set_pd(1., 2.);
59019        assert_eq_m128d(r, e);
59020    }
59021
59022    #[simd_test(enable = "avx512f")]
59023    unsafe fn test_mm_getexp_round_ss() {
59024        let a = _mm_set1_ps(2.);
59025        let b = _mm_set1_ps(3.);
59026        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59027        let e = _mm_set_ps(2., 2., 2., 1.);
59028        assert_eq_m128(r, e);
59029    }
59030
59031    #[simd_test(enable = "avx512f")]
59032    unsafe fn test_mm_mask_getexp_round_ss() {
59033        let a = _mm_set1_ps(2.);
59034        let b = _mm_set1_ps(3.);
59035        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59036        let e = _mm_set_ps(2., 2., 2., 2.);
59037        assert_eq_m128(r, e);
59038        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59039        let e = _mm_set_ps(2., 2., 2., 1.);
59040        assert_eq_m128(r, e);
59041    }
59042
59043    #[simd_test(enable = "avx512f")]
59044    unsafe fn test_mm_maskz_getexp_round_ss() {
59045        let a = _mm_set1_ps(2.);
59046        let b = _mm_set1_ps(3.);
59047        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59048        let e = _mm_set_ps(2., 2., 2., 0.);
59049        assert_eq_m128(r, e);
59050        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59051        let e = _mm_set_ps(2., 2., 2., 1.);
59052        assert_eq_m128(r, e);
59053    }
59054
59055    #[simd_test(enable = "avx512f")]
59056    unsafe fn test_mm_getexp_round_sd() {
59057        let a = _mm_set1_pd(2.);
59058        let b = _mm_set1_pd(3.);
59059        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59060        let e = _mm_set_pd(2., 1.);
59061        assert_eq_m128d(r, e);
59062    }
59063
59064    #[simd_test(enable = "avx512f")]
59065    unsafe fn test_mm_mask_getexp_round_sd() {
59066        let a = _mm_set1_pd(2.);
59067        let b = _mm_set1_pd(3.);
59068        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59069        let e = _mm_set_pd(2., 2.);
59070        assert_eq_m128d(r, e);
59071        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59072        let e = _mm_set_pd(2., 1.);
59073        assert_eq_m128d(r, e);
59074    }
59075
59076    #[simd_test(enable = "avx512f")]
59077    unsafe fn test_mm_maskz_getexp_round_sd() {
59078        let a = _mm_set1_pd(2.);
59079        let b = _mm_set1_pd(3.);
59080        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59081        let e = _mm_set_pd(2., 0.);
59082        assert_eq_m128d(r, e);
59083        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59084        let e = _mm_set_pd(2., 1.);
59085        assert_eq_m128d(r, e);
59086    }
59087
59088    #[simd_test(enable = "avx512f")]
59089    unsafe fn test_mm_getmant_round_ss() {
59090        let a = _mm_set1_ps(20.);
59091        let b = _mm_set1_ps(10.);
59092        let r =
59093            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59094                a, b,
59095            );
59096        let e = _mm_set_ps(20., 20., 20., 1.25);
59097        assert_eq_m128(r, e);
59098    }
59099
59100    #[simd_test(enable = "avx512f")]
59101    unsafe fn test_mm_mask_getmant_round_ss() {
59102        let a = _mm_set1_ps(20.);
59103        let b = _mm_set1_ps(10.);
59104        let r = _mm_mask_getmant_round_ss::<
59105            _MM_MANT_NORM_1_2,
59106            _MM_MANT_SIGN_SRC,
59107            _MM_FROUND_CUR_DIRECTION,
59108        >(a, 0, a, b);
59109        let e = _mm_set_ps(20., 20., 20., 20.);
59110        assert_eq_m128(r, e);
59111        let r = _mm_mask_getmant_round_ss::<
59112            _MM_MANT_NORM_1_2,
59113            _MM_MANT_SIGN_SRC,
59114            _MM_FROUND_CUR_DIRECTION,
59115        >(a, 0b11111111, a, b);
59116        let e = _mm_set_ps(20., 20., 20., 1.25);
59117        assert_eq_m128(r, e);
59118    }
59119
59120    #[simd_test(enable = "avx512f")]
59121    unsafe fn test_mm_maskz_getmant_round_ss() {
59122        let a = _mm_set1_ps(20.);
59123        let b = _mm_set1_ps(10.);
59124        let r = _mm_maskz_getmant_round_ss::<
59125            _MM_MANT_NORM_1_2,
59126            _MM_MANT_SIGN_SRC,
59127            _MM_FROUND_CUR_DIRECTION,
59128        >(0, a, b);
59129        let e = _mm_set_ps(20., 20., 20., 0.);
59130        assert_eq_m128(r, e);
59131        let r = _mm_maskz_getmant_round_ss::<
59132            _MM_MANT_NORM_1_2,
59133            _MM_MANT_SIGN_SRC,
59134            _MM_FROUND_CUR_DIRECTION,
59135        >(0b11111111, a, b);
59136        let e = _mm_set_ps(20., 20., 20., 1.25);
59137        assert_eq_m128(r, e);
59138    }
59139
59140    #[simd_test(enable = "avx512f")]
59141    unsafe fn test_mm_getmant_round_sd() {
59142        let a = _mm_set1_pd(20.);
59143        let b = _mm_set1_pd(10.);
59144        let r =
59145            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59146                a, b,
59147            );
59148        let e = _mm_set_pd(20., 1.25);
59149        assert_eq_m128d(r, e);
59150    }
59151
59152    #[simd_test(enable = "avx512f")]
59153    unsafe fn test_mm_mask_getmant_round_sd() {
59154        let a = _mm_set1_pd(20.);
59155        let b = _mm_set1_pd(10.);
59156        let r = _mm_mask_getmant_round_sd::<
59157            _MM_MANT_NORM_1_2,
59158            _MM_MANT_SIGN_SRC,
59159            _MM_FROUND_CUR_DIRECTION,
59160        >(a, 0, a, b);
59161        let e = _mm_set_pd(20., 20.);
59162        assert_eq_m128d(r, e);
59163        let r = _mm_mask_getmant_round_sd::<
59164            _MM_MANT_NORM_1_2,
59165            _MM_MANT_SIGN_SRC,
59166            _MM_FROUND_CUR_DIRECTION,
59167        >(a, 0b11111111, a, b);
59168        let e = _mm_set_pd(20., 1.25);
59169        assert_eq_m128d(r, e);
59170    }
59171
59172    #[simd_test(enable = "avx512f")]
59173    unsafe fn test_mm_maskz_getmant_round_sd() {
59174        let a = _mm_set1_pd(20.);
59175        let b = _mm_set1_pd(10.);
59176        let r = _mm_maskz_getmant_round_sd::<
59177            _MM_MANT_NORM_1_2,
59178            _MM_MANT_SIGN_SRC,
59179            _MM_FROUND_CUR_DIRECTION,
59180        >(0, a, b);
59181        let e = _mm_set_pd(20., 0.);
59182        assert_eq_m128d(r, e);
59183        let r = _mm_maskz_getmant_round_sd::<
59184            _MM_MANT_NORM_1_2,
59185            _MM_MANT_SIGN_SRC,
59186            _MM_FROUND_CUR_DIRECTION,
59187        >(0b11111111, a, b);
59188        let e = _mm_set_pd(20., 1.25);
59189        assert_eq_m128d(r, e);
59190    }
59191
59192    #[simd_test(enable = "avx512f")]
59193    unsafe fn test_mm_roundscale_round_ss() {
59194        let a = _mm_set1_ps(2.2);
59195        let b = _mm_set1_ps(1.1);
59196        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59197        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59198        assert_eq_m128(r, e);
59199    }
59200
59201    #[simd_test(enable = "avx512f")]
59202    unsafe fn test_mm_mask_roundscale_round_ss() {
59203        let a = _mm_set1_ps(2.2);
59204        let b = _mm_set1_ps(1.1);
59205        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59206        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59207        assert_eq_m128(r, e);
59208        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59209        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59210        assert_eq_m128(r, e);
59211    }
59212
59213    #[simd_test(enable = "avx512f")]
59214    unsafe fn test_mm_maskz_roundscale_round_ss() {
59215        let a = _mm_set1_ps(2.2);
59216        let b = _mm_set1_ps(1.1);
59217        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59218        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59219        assert_eq_m128(r, e);
59220        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59221        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59222        assert_eq_m128(r, e);
59223    }
59224
59225    #[simd_test(enable = "avx512f")]
59226    unsafe fn test_mm_roundscale_round_sd() {
59227        let a = _mm_set1_pd(2.2);
59228        let b = _mm_set1_pd(1.1);
59229        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59230        let e = _mm_set_pd(2.2, 1.0);
59231        assert_eq_m128d(r, e);
59232    }
59233
59234    #[simd_test(enable = "avx512f")]
59235    unsafe fn test_mm_mask_roundscale_round_sd() {
59236        let a = _mm_set1_pd(2.2);
59237        let b = _mm_set1_pd(1.1);
59238        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59239        let e = _mm_set_pd(2.2, 2.2);
59240        assert_eq_m128d(r, e);
59241        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59242        let e = _mm_set_pd(2.2, 1.0);
59243        assert_eq_m128d(r, e);
59244    }
59245
59246    #[simd_test(enable = "avx512f")]
59247    unsafe fn test_mm_maskz_roundscale_round_sd() {
59248        let a = _mm_set1_pd(2.2);
59249        let b = _mm_set1_pd(1.1);
59250        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59251        let e = _mm_set_pd(2.2, 0.0);
59252        assert_eq_m128d(r, e);
59253        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59254        let e = _mm_set_pd(2.2, 1.0);
59255        assert_eq_m128d(r, e);
59256    }
59257
59258    #[simd_test(enable = "avx512f")]
59259    unsafe fn test_mm_scalef_round_ss() {
59260        let a = _mm_set1_ps(1.);
59261        let b = _mm_set1_ps(3.);
59262        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59263        let e = _mm_set_ps(1., 1., 1., 8.);
59264        assert_eq_m128(r, e);
59265    }
59266
59267    #[simd_test(enable = "avx512f")]
59268    unsafe fn test_mm_mask_scalef_round_ss() {
59269        let a = _mm_set1_ps(1.);
59270        let b = _mm_set1_ps(3.);
59271        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59272            a, 0, a, b,
59273        );
59274        let e = _mm_set_ps(1., 1., 1., 1.);
59275        assert_eq_m128(r, e);
59276        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59277            a, 0b11111111, a, b,
59278        );
59279        let e = _mm_set_ps(1., 1., 1., 8.);
59280        assert_eq_m128(r, e);
59281    }
59282
59283    #[simd_test(enable = "avx512f")]
59284    unsafe fn test_mm_maskz_scalef_round_ss() {
59285        let a = _mm_set1_ps(1.);
59286        let b = _mm_set1_ps(3.);
59287        let r =
59288            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59289        let e = _mm_set_ps(1., 1., 1., 0.);
59290        assert_eq_m128(r, e);
59291        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59292            0b11111111, a, b,
59293        );
59294        let e = _mm_set_ps(1., 1., 1., 8.);
59295        assert_eq_m128(r, e);
59296    }
59297
59298    #[simd_test(enable = "avx512f")]
59299    unsafe fn test_mm_scalef_round_sd() {
59300        let a = _mm_set1_pd(1.);
59301        let b = _mm_set1_pd(3.);
59302        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59303        let e = _mm_set_pd(1., 8.);
59304        assert_eq_m128d(r, e);
59305    }
59306
59307    #[simd_test(enable = "avx512f")]
59308    unsafe fn test_mm_mask_scalef_round_sd() {
59309        let a = _mm_set1_pd(1.);
59310        let b = _mm_set1_pd(3.);
59311        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59312            a, 0, a, b,
59313        );
59314        let e = _mm_set_pd(1., 1.);
59315        assert_eq_m128d(r, e);
59316        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59317            a, 0b11111111, a, b,
59318        );
59319        let e = _mm_set_pd(1., 8.);
59320        assert_eq_m128d(r, e);
59321    }
59322
59323    #[simd_test(enable = "avx512f")]
59324    unsafe fn test_mm_maskz_scalef_round_sd() {
59325        let a = _mm_set1_pd(1.);
59326        let b = _mm_set1_pd(3.);
59327        let r =
59328            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59329        let e = _mm_set_pd(1., 0.);
59330        assert_eq_m128d(r, e);
59331        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59332            0b11111111, a, b,
59333        );
59334        let e = _mm_set_pd(1., 8.);
59335        assert_eq_m128d(r, e);
59336    }
59337
59338    #[simd_test(enable = "avx512f")]
59339    unsafe fn test_mm_fmadd_round_ss() {
59340        let a = _mm_set1_ps(1.);
59341        let b = _mm_set1_ps(2.);
59342        let c = _mm_set1_ps(3.);
59343        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59344        let e = _mm_set_ps(1., 1., 1., 5.);
59345        assert_eq_m128(r, e);
59346    }
59347
59348    #[simd_test(enable = "avx512f")]
59349    unsafe fn test_mm_mask_fmadd_round_ss() {
59350        let a = _mm_set1_ps(1.);
59351        let b = _mm_set1_ps(2.);
59352        let c = _mm_set1_ps(3.);
59353        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59354            a, 0, b, c,
59355        );
59356        assert_eq_m128(r, a);
59357        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59358            a, 0b11111111, b, c,
59359        );
59360        let e = _mm_set_ps(1., 1., 1., 5.);
59361        assert_eq_m128(r, e);
59362    }
59363
59364    #[simd_test(enable = "avx512f")]
59365    unsafe fn test_mm_maskz_fmadd_round_ss() {
59366        let a = _mm_set1_ps(1.);
59367        let b = _mm_set1_ps(2.);
59368        let c = _mm_set1_ps(3.);
59369        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59370            0, a, b, c,
59371        );
59372        let e = _mm_set_ps(1., 1., 1., 0.);
59373        assert_eq_m128(r, e);
59374        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59375            0b11111111, a, b, c,
59376        );
59377        let e = _mm_set_ps(1., 1., 1., 5.);
59378        assert_eq_m128(r, e);
59379    }
59380
59381    #[simd_test(enable = "avx512f")]
59382    unsafe fn test_mm_mask3_fmadd_round_ss() {
59383        let a = _mm_set1_ps(1.);
59384        let b = _mm_set1_ps(2.);
59385        let c = _mm_set1_ps(3.);
59386        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59387            a, b, c, 0,
59388        );
59389        assert_eq_m128(r, c);
59390        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59391            a, b, c, 0b11111111,
59392        );
59393        let e = _mm_set_ps(3., 3., 3., 5.);
59394        assert_eq_m128(r, e);
59395    }
59396
59397    #[simd_test(enable = "avx512f")]
59398    unsafe fn test_mm_fmadd_round_sd() {
59399        let a = _mm_set1_pd(1.);
59400        let b = _mm_set1_pd(2.);
59401        let c = _mm_set1_pd(3.);
59402        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59403        let e = _mm_set_pd(1., 5.);
59404        assert_eq_m128d(r, e);
59405    }
59406
59407    #[simd_test(enable = "avx512f")]
59408    unsafe fn test_mm_mask_fmadd_round_sd() {
59409        let a = _mm_set1_pd(1.);
59410        let b = _mm_set1_pd(2.);
59411        let c = _mm_set1_pd(3.);
59412        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59413            a, 0, b, c,
59414        );
59415        assert_eq_m128d(r, a);
59416        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59417            a, 0b11111111, b, c,
59418        );
59419        let e = _mm_set_pd(1., 5.);
59420        assert_eq_m128d(r, e);
59421    }
59422
59423    #[simd_test(enable = "avx512f")]
59424    unsafe fn test_mm_maskz_fmadd_round_sd() {
59425        let a = _mm_set1_pd(1.);
59426        let b = _mm_set1_pd(2.);
59427        let c = _mm_set1_pd(3.);
59428        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59429            0, a, b, c,
59430        );
59431        let e = _mm_set_pd(1., 0.);
59432        assert_eq_m128d(r, e);
59433        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59434            0b11111111, a, b, c,
59435        );
59436        let e = _mm_set_pd(1., 5.);
59437        assert_eq_m128d(r, e);
59438    }
59439
59440    #[simd_test(enable = "avx512f")]
59441    unsafe fn test_mm_mask3_fmadd_round_sd() {
59442        let a = _mm_set1_pd(1.);
59443        let b = _mm_set1_pd(2.);
59444        let c = _mm_set1_pd(3.);
59445        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59446            a, b, c, 0,
59447        );
59448        assert_eq_m128d(r, c);
59449        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59450            a, b, c, 0b11111111,
59451        );
59452        let e = _mm_set_pd(3., 5.);
59453        assert_eq_m128d(r, e);
59454    }
59455
59456    #[simd_test(enable = "avx512f")]
59457    unsafe fn test_mm_fmsub_round_ss() {
59458        let a = _mm_set1_ps(1.);
59459        let b = _mm_set1_ps(2.);
59460        let c = _mm_set1_ps(3.);
59461        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59462        let e = _mm_set_ps(1., 1., 1., -1.);
59463        assert_eq_m128(r, e);
59464    }
59465
59466    #[simd_test(enable = "avx512f")]
59467    unsafe fn test_mm_mask_fmsub_round_ss() {
59468        let a = _mm_set1_ps(1.);
59469        let b = _mm_set1_ps(2.);
59470        let c = _mm_set1_ps(3.);
59471        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59472            a, 0, b, c,
59473        );
59474        assert_eq_m128(r, a);
59475        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59476            a, 0b11111111, b, c,
59477        );
59478        let e = _mm_set_ps(1., 1., 1., -1.);
59479        assert_eq_m128(r, e);
59480    }
59481
59482    #[simd_test(enable = "avx512f")]
59483    unsafe fn test_mm_maskz_fmsub_round_ss() {
59484        let a = _mm_set1_ps(1.);
59485        let b = _mm_set1_ps(2.);
59486        let c = _mm_set1_ps(3.);
59487        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59488            0, a, b, c,
59489        );
59490        let e = _mm_set_ps(1., 1., 1., 0.);
59491        assert_eq_m128(r, e);
59492        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59493            0b11111111, a, b, c,
59494        );
59495        let e = _mm_set_ps(1., 1., 1., -1.);
59496        assert_eq_m128(r, e);
59497    }
59498
59499    #[simd_test(enable = "avx512f")]
59500    unsafe fn test_mm_mask3_fmsub_round_ss() {
59501        let a = _mm_set1_ps(1.);
59502        let b = _mm_set1_ps(2.);
59503        let c = _mm_set1_ps(3.);
59504        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59505            a, b, c, 0,
59506        );
59507        assert_eq_m128(r, c);
59508        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59509            a, b, c, 0b11111111,
59510        );
59511        let e = _mm_set_ps(3., 3., 3., -1.);
59512        assert_eq_m128(r, e);
59513    }
59514
59515    #[simd_test(enable = "avx512f")]
59516    unsafe fn test_mm_fmsub_round_sd() {
59517        let a = _mm_set1_pd(1.);
59518        let b = _mm_set1_pd(2.);
59519        let c = _mm_set1_pd(3.);
59520        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59521        let e = _mm_set_pd(1., -1.);
59522        assert_eq_m128d(r, e);
59523    }
59524
59525    #[simd_test(enable = "avx512f")]
59526    unsafe fn test_mm_mask_fmsub_round_sd() {
59527        let a = _mm_set1_pd(1.);
59528        let b = _mm_set1_pd(2.);
59529        let c = _mm_set1_pd(3.);
59530        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59531            a, 0, b, c,
59532        );
59533        assert_eq_m128d(r, a);
59534        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59535            a, 0b11111111, b, c,
59536        );
59537        let e = _mm_set_pd(1., -1.);
59538        assert_eq_m128d(r, e);
59539    }
59540
59541    #[simd_test(enable = "avx512f")]
59542    unsafe fn test_mm_maskz_fmsub_round_sd() {
59543        let a = _mm_set1_pd(1.);
59544        let b = _mm_set1_pd(2.);
59545        let c = _mm_set1_pd(3.);
59546        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59547            0, a, b, c,
59548        );
59549        let e = _mm_set_pd(1., 0.);
59550        assert_eq_m128d(r, e);
59551        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59552            0b11111111, a, b, c,
59553        );
59554        let e = _mm_set_pd(1., -1.);
59555        assert_eq_m128d(r, e);
59556    }
59557
59558    #[simd_test(enable = "avx512f")]
59559    unsafe fn test_mm_mask3_fmsub_round_sd() {
59560        let a = _mm_set1_pd(1.);
59561        let b = _mm_set1_pd(2.);
59562        let c = _mm_set1_pd(3.);
59563        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59564            a, b, c, 0,
59565        );
59566        assert_eq_m128d(r, c);
59567        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59568            a, b, c, 0b11111111,
59569        );
59570        let e = _mm_set_pd(3., -1.);
59571        assert_eq_m128d(r, e);
59572    }
59573
59574    #[simd_test(enable = "avx512f")]
59575    unsafe fn test_mm_fnmadd_round_ss() {
59576        let a = _mm_set1_ps(1.);
59577        let b = _mm_set1_ps(2.);
59578        let c = _mm_set1_ps(3.);
59579        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59580        let e = _mm_set_ps(1., 1., 1., 1.);
59581        assert_eq_m128(r, e);
59582    }
59583
59584    #[simd_test(enable = "avx512f")]
59585    unsafe fn test_mm_mask_fnmadd_round_ss() {
59586        let a = _mm_set1_ps(1.);
59587        let b = _mm_set1_ps(2.);
59588        let c = _mm_set1_ps(3.);
59589        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59590            a, 0, b, c,
59591        );
59592        assert_eq_m128(r, a);
59593        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59594            a, 0b11111111, b, c,
59595        );
59596        let e = _mm_set_ps(1., 1., 1., 1.);
59597        assert_eq_m128(r, e);
59598    }
59599
59600    #[simd_test(enable = "avx512f")]
59601    unsafe fn test_mm_maskz_fnmadd_round_ss() {
59602        let a = _mm_set1_ps(1.);
59603        let b = _mm_set1_ps(2.);
59604        let c = _mm_set1_ps(3.);
59605        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59606            0, a, b, c,
59607        );
59608        let e = _mm_set_ps(1., 1., 1., 0.);
59609        assert_eq_m128(r, e);
59610        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59611            0b11111111, a, b, c,
59612        );
59613        let e = _mm_set_ps(1., 1., 1., 1.);
59614        assert_eq_m128(r, e);
59615    }
59616
59617    #[simd_test(enable = "avx512f")]
59618    unsafe fn test_mm_mask3_fnmadd_round_ss() {
59619        let a = _mm_set1_ps(1.);
59620        let b = _mm_set1_ps(2.);
59621        let c = _mm_set1_ps(3.);
59622        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59623            a, b, c, 0,
59624        );
59625        assert_eq_m128(r, c);
59626        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59627            a, b, c, 0b11111111,
59628        );
59629        let e = _mm_set_ps(3., 3., 3., 1.);
59630        assert_eq_m128(r, e);
59631    }
59632
59633    #[simd_test(enable = "avx512f")]
59634    unsafe fn test_mm_fnmadd_round_sd() {
59635        let a = _mm_set1_pd(1.);
59636        let b = _mm_set1_pd(2.);
59637        let c = _mm_set1_pd(3.);
59638        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59639        let e = _mm_set_pd(1., 1.);
59640        assert_eq_m128d(r, e);
59641    }
59642
59643    #[simd_test(enable = "avx512f")]
59644    unsafe fn test_mm_mask_fnmadd_round_sd() {
59645        let a = _mm_set1_pd(1.);
59646        let b = _mm_set1_pd(2.);
59647        let c = _mm_set1_pd(3.);
59648        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59649            a, 0, b, c,
59650        );
59651        assert_eq_m128d(r, a);
59652        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59653            a, 0b11111111, b, c,
59654        );
59655        let e = _mm_set_pd(1., 1.);
59656        assert_eq_m128d(r, e);
59657    }
59658
59659    #[simd_test(enable = "avx512f")]
59660    unsafe fn test_mm_maskz_fnmadd_round_sd() {
59661        let a = _mm_set1_pd(1.);
59662        let b = _mm_set1_pd(2.);
59663        let c = _mm_set1_pd(3.);
59664        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59665            0, a, b, c,
59666        );
59667        let e = _mm_set_pd(1., 0.);
59668        assert_eq_m128d(r, e);
59669        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59670            0b11111111, a, b, c,
59671        );
59672        let e = _mm_set_pd(1., 1.);
59673        assert_eq_m128d(r, e);
59674    }
59675
59676    #[simd_test(enable = "avx512f")]
59677    unsafe fn test_mm_mask3_fnmadd_round_sd() {
59678        let a = _mm_set1_pd(1.);
59679        let b = _mm_set1_pd(2.);
59680        let c = _mm_set1_pd(3.);
59681        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59682            a, b, c, 0,
59683        );
59684        assert_eq_m128d(r, c);
59685        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59686            a, b, c, 0b11111111,
59687        );
59688        let e = _mm_set_pd(3., 1.);
59689        assert_eq_m128d(r, e);
59690    }
59691
59692    #[simd_test(enable = "avx512f")]
59693    unsafe fn test_mm_fnmsub_round_ss() {
59694        let a = _mm_set1_ps(1.);
59695        let b = _mm_set1_ps(2.);
59696        let c = _mm_set1_ps(3.);
59697        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59698        let e = _mm_set_ps(1., 1., 1., -5.);
59699        assert_eq_m128(r, e);
59700    }
59701
59702    #[simd_test(enable = "avx512f")]
59703    unsafe fn test_mm_mask_fnmsub_round_ss() {
59704        let a = _mm_set1_ps(1.);
59705        let b = _mm_set1_ps(2.);
59706        let c = _mm_set1_ps(3.);
59707        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59708            a, 0, b, c,
59709        );
59710        assert_eq_m128(r, a);
59711        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59712            a, 0b11111111, b, c,
59713        );
59714        let e = _mm_set_ps(1., 1., 1., -5.);
59715        assert_eq_m128(r, e);
59716    }
59717
59718    #[simd_test(enable = "avx512f")]
59719    unsafe fn test_mm_maskz_fnmsub_round_ss() {
59720        let a = _mm_set1_ps(1.);
59721        let b = _mm_set1_ps(2.);
59722        let c = _mm_set1_ps(3.);
59723        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59724            0, a, b, c,
59725        );
59726        let e = _mm_set_ps(1., 1., 1., 0.);
59727        assert_eq_m128(r, e);
59728        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59729            0b11111111, a, b, c,
59730        );
59731        let e = _mm_set_ps(1., 1., 1., -5.);
59732        assert_eq_m128(r, e);
59733    }
59734
59735    #[simd_test(enable = "avx512f")]
59736    unsafe fn test_mm_mask3_fnmsub_round_ss() {
59737        let a = _mm_set1_ps(1.);
59738        let b = _mm_set1_ps(2.);
59739        let c = _mm_set1_ps(3.);
59740        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59741            a, b, c, 0,
59742        );
59743        assert_eq_m128(r, c);
59744        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59745            a, b, c, 0b11111111,
59746        );
59747        let e = _mm_set_ps(3., 3., 3., -5.);
59748        assert_eq_m128(r, e);
59749    }
59750
59751    #[simd_test(enable = "avx512f")]
59752    unsafe fn test_mm_fnmsub_round_sd() {
59753        let a = _mm_set1_pd(1.);
59754        let b = _mm_set1_pd(2.);
59755        let c = _mm_set1_pd(3.);
59756        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59757        let e = _mm_set_pd(1., -5.);
59758        assert_eq_m128d(r, e);
59759    }
59760
59761    #[simd_test(enable = "avx512f")]
59762    unsafe fn test_mm_mask_fnmsub_round_sd() {
59763        let a = _mm_set1_pd(1.);
59764        let b = _mm_set1_pd(2.);
59765        let c = _mm_set1_pd(3.);
59766        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59767            a, 0, b, c,
59768        );
59769        assert_eq_m128d(r, a);
59770        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59771            a, 0b11111111, b, c,
59772        );
59773        let e = _mm_set_pd(1., -5.);
59774        assert_eq_m128d(r, e);
59775    }
59776
59777    #[simd_test(enable = "avx512f")]
59778    unsafe fn test_mm_maskz_fnmsub_round_sd() {
59779        let a = _mm_set1_pd(1.);
59780        let b = _mm_set1_pd(2.);
59781        let c = _mm_set1_pd(3.);
59782        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59783            0, a, b, c,
59784        );
59785        let e = _mm_set_pd(1., 0.);
59786        assert_eq_m128d(r, e);
59787        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59788            0b11111111, a, b, c,
59789        );
59790        let e = _mm_set_pd(1., -5.);
59791        assert_eq_m128d(r, e);
59792    }
59793
59794    #[simd_test(enable = "avx512f")]
59795    unsafe fn test_mm_mask3_fnmsub_round_sd() {
59796        let a = _mm_set1_pd(1.);
59797        let b = _mm_set1_pd(2.);
59798        let c = _mm_set1_pd(3.);
59799        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59800            a, b, c, 0,
59801        );
59802        assert_eq_m128d(r, c);
59803        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59804            a, b, c, 0b11111111,
59805        );
59806        let e = _mm_set_pd(3., -5.);
59807        assert_eq_m128d(r, e);
59808    }
59809
59810    #[simd_test(enable = "avx512f")]
59811    unsafe fn test_mm_fixupimm_ss() {
59812        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59813        let b = _mm_set1_ps(f32::MAX);
59814        let c = _mm_set1_epi32(i32::MAX);
59815        let r = _mm_fixupimm_ss::<5>(a, b, c);
59816        let e = _mm_set_ps(0., 0., 0., -0.0);
59817        assert_eq_m128(r, e);
59818    }
59819
59820    #[simd_test(enable = "avx512f")]
59821    unsafe fn test_mm_mask_fixupimm_ss() {
59822        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59823        let b = _mm_set1_ps(f32::MAX);
59824        let c = _mm_set1_epi32(i32::MAX);
59825        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
59826        let e = _mm_set_ps(0., 0., 0., -0.0);
59827        assert_eq_m128(r, e);
59828    }
59829
59830    #[simd_test(enable = "avx512f")]
59831    unsafe fn test_mm_maskz_fixupimm_ss() {
59832        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59833        let b = _mm_set1_ps(f32::MAX);
59834        let c = _mm_set1_epi32(i32::MAX);
59835        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
59836        let e = _mm_set_ps(0., 0., 0., 0.0);
59837        assert_eq_m128(r, e);
59838        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
59839        let e = _mm_set_ps(0., 0., 0., -0.0);
59840        assert_eq_m128(r, e);
59841    }
59842
59843    #[simd_test(enable = "avx512f")]
59844    unsafe fn test_mm_fixupimm_sd() {
59845        let a = _mm_set_pd(0., f64::NAN);
59846        let b = _mm_set1_pd(f64::MAX);
59847        let c = _mm_set1_epi64x(i32::MAX as i64);
59848        let r = _mm_fixupimm_sd::<5>(a, b, c);
59849        let e = _mm_set_pd(0., -0.0);
59850        assert_eq_m128d(r, e);
59851    }
59852
59853    #[simd_test(enable = "avx512f")]
59854    unsafe fn test_mm_mask_fixupimm_sd() {
59855        let a = _mm_set_pd(0., f64::NAN);
59856        let b = _mm_set1_pd(f64::MAX);
59857        let c = _mm_set1_epi64x(i32::MAX as i64);
59858        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
59859        let e = _mm_set_pd(0., -0.0);
59860        assert_eq_m128d(r, e);
59861    }
59862
59863    #[simd_test(enable = "avx512f")]
59864    unsafe fn test_mm_maskz_fixupimm_sd() {
59865        let a = _mm_set_pd(0., f64::NAN);
59866        let b = _mm_set1_pd(f64::MAX);
59867        let c = _mm_set1_epi64x(i32::MAX as i64);
59868        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
59869        let e = _mm_set_pd(0., 0.0);
59870        assert_eq_m128d(r, e);
59871        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
59872        let e = _mm_set_pd(0., -0.0);
59873        assert_eq_m128d(r, e);
59874    }
59875
59876    #[simd_test(enable = "avx512f")]
59877    unsafe fn test_mm_fixupimm_round_ss() {
59878        let a = _mm_set_ps(1., 0., 0., f32::NAN);
59879        let b = _mm_set1_ps(f32::MAX);
59880        let c = _mm_set1_epi32(i32::MAX);
59881        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59882        let e = _mm_set_ps(1., 0., 0., -0.0);
59883        assert_eq_m128(r, e);
59884    }
59885
59886    #[simd_test(enable = "avx512f")]
59887    unsafe fn test_mm_mask_fixupimm_round_ss() {
59888        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59889        let b = _mm_set1_ps(f32::MAX);
59890        let c = _mm_set1_epi32(i32::MAX);
59891        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59892        let e = _mm_set_ps(0., 0., 0., -0.0);
59893        assert_eq_m128(r, e);
59894    }
59895
59896    #[simd_test(enable = "avx512f")]
59897    unsafe fn test_mm_maskz_fixupimm_round_ss() {
59898        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59899        let b = _mm_set1_ps(f32::MAX);
59900        let c = _mm_set1_epi32(i32::MAX);
59901        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59902        let e = _mm_set_ps(0., 0., 0., 0.0);
59903        assert_eq_m128(r, e);
59904        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59905        let e = _mm_set_ps(0., 0., 0., -0.0);
59906        assert_eq_m128(r, e);
59907    }
59908
59909    #[simd_test(enable = "avx512f")]
59910    unsafe fn test_mm_fixupimm_round_sd() {
59911        let a = _mm_set_pd(0., f64::NAN);
59912        let b = _mm_set1_pd(f64::MAX);
59913        let c = _mm_set1_epi64x(i32::MAX as i64);
59914        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59915        let e = _mm_set_pd(0., -0.0);
59916        assert_eq_m128d(r, e);
59917    }
59918
59919    #[simd_test(enable = "avx512f")]
59920    unsafe fn test_mm_mask_fixupimm_round_sd() {
59921        let a = _mm_set_pd(0., f64::NAN);
59922        let b = _mm_set1_pd(f64::MAX);
59923        let c = _mm_set1_epi64x(i32::MAX as i64);
59924        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59925        let e = _mm_set_pd(0., -0.0);
59926        assert_eq_m128d(r, e);
59927    }
59928
59929    #[simd_test(enable = "avx512f")]
59930    unsafe fn test_mm_maskz_fixupimm_round_sd() {
59931        let a = _mm_set_pd(0., f64::NAN);
59932        let b = _mm_set1_pd(f64::MAX);
59933        let c = _mm_set1_epi64x(i32::MAX as i64);
59934        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59935        let e = _mm_set_pd(0., 0.0);
59936        assert_eq_m128d(r, e);
59937        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59938        let e = _mm_set_pd(0., -0.0);
59939        assert_eq_m128d(r, e);
59940    }
59941
59942    #[simd_test(enable = "avx512f")]
59943    unsafe fn test_mm_mask_cvtss_sd() {
59944        let a = _mm_set_pd(6., -7.5);
59945        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59946        let r = _mm_mask_cvtss_sd(a, 0, a, b);
59947        assert_eq_m128d(r, a);
59948        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
59949        let e = _mm_set_pd(6., -1.5);
59950        assert_eq_m128d(r, e);
59951    }
59952
59953    #[simd_test(enable = "avx512f")]
59954    unsafe fn test_mm_maskz_cvtss_sd() {
59955        let a = _mm_set_pd(6., -7.5);
59956        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59957        let r = _mm_maskz_cvtss_sd(0, a, b);
59958        let e = _mm_set_pd(6., 0.);
59959        assert_eq_m128d(r, e);
59960        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
59961        let e = _mm_set_pd(6., -1.5);
59962        assert_eq_m128d(r, e);
59963    }
59964
59965    #[simd_test(enable = "avx512f")]
59966    unsafe fn test_mm_mask_cvtsd_ss() {
59967        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59968        let b = _mm_set_pd(6., -7.5);
59969        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
59970        assert_eq_m128(r, a);
59971        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
59972        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59973        assert_eq_m128(r, e);
59974    }
59975
59976    #[simd_test(enable = "avx512f")]
59977    unsafe fn test_mm_maskz_cvtsd_ss() {
59978        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59979        let b = _mm_set_pd(6., -7.5);
59980        let r = _mm_maskz_cvtsd_ss(0, a, b);
59981        let e = _mm_set_ps(0., -0.5, 1., 0.);
59982        assert_eq_m128(r, e);
59983        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
59984        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59985        assert_eq_m128(r, e);
59986    }
59987
59988    #[simd_test(enable = "avx512f")]
59989    unsafe fn test_mm_cvt_roundss_sd() {
59990        let a = _mm_set_pd(6., -7.5);
59991        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59992        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59993        let e = _mm_set_pd(6., -1.5);
59994        assert_eq_m128d(r, e);
59995    }
59996
59997    #[simd_test(enable = "avx512f")]
59998    unsafe fn test_mm_mask_cvt_roundss_sd() {
59999        let a = _mm_set_pd(6., -7.5);
60000        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60001        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60002        assert_eq_m128d(r, a);
60003        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60004        let e = _mm_set_pd(6., -1.5);
60005        assert_eq_m128d(r, e);
60006    }
60007
60008    #[simd_test(enable = "avx512f")]
60009    unsafe fn test_mm_maskz_cvt_roundss_sd() {
60010        let a = _mm_set_pd(6., -7.5);
60011        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60012        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60013        let e = _mm_set_pd(6., 0.);
60014        assert_eq_m128d(r, e);
60015        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60016        let e = _mm_set_pd(6., -1.5);
60017        assert_eq_m128d(r, e);
60018    }
60019
60020    #[simd_test(enable = "avx512f")]
60021    unsafe fn test_mm_cvt_roundsd_ss() {
60022        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60023        let b = _mm_set_pd(6., -7.5);
60024        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60025        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60026        assert_eq_m128(r, e);
60027    }
60028
60029    #[simd_test(enable = "avx512f")]
60030    unsafe fn test_mm_mask_cvt_roundsd_ss() {
60031        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60032        let b = _mm_set_pd(6., -7.5);
60033        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
60034        assert_eq_m128(r, a);
60035        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60036            a, 0b11111111, a, b,
60037        );
60038        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60039        assert_eq_m128(r, e);
60040    }
60041
60042    #[simd_test(enable = "avx512f")]
60043    unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60044        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60045        let b = _mm_set_pd(6., -7.5);
60046        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60047        let e = _mm_set_ps(0., -0.5, 1., 0.);
60048        assert_eq_m128(r, e);
60049        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60050            0b11111111, a, b,
60051        );
60052        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60053        assert_eq_m128(r, e);
60054    }
60055
60056    #[simd_test(enable = "avx512f")]
60057    unsafe fn test_mm_cvt_roundss_si32() {
60058        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60059        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60060        let e: i32 = -1;
60061        assert_eq!(r, e);
60062    }
60063
60064    #[simd_test(enable = "avx512f")]
60065    unsafe fn test_mm_cvt_roundss_i32() {
60066        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60067        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60068        let e: i32 = -1;
60069        assert_eq!(r, e);
60070    }
60071
60072    #[simd_test(enable = "avx512f")]
60073    unsafe fn test_mm_cvt_roundss_u32() {
60074        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60075        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60076        let e: u32 = u32::MAX;
60077        assert_eq!(r, e);
60078    }
60079
60080    #[simd_test(enable = "avx512f")]
60081    unsafe fn test_mm_cvtss_i32() {
60082        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60083        let r = _mm_cvtss_i32(a);
60084        let e: i32 = -2;
60085        assert_eq!(r, e);
60086    }
60087
60088    #[simd_test(enable = "avx512f")]
60089    unsafe fn test_mm_cvtss_u32() {
60090        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60091        let r = _mm_cvtss_u32(a);
60092        let e: u32 = u32::MAX;
60093        assert_eq!(r, e);
60094    }
60095
60096    #[simd_test(enable = "avx512f")]
60097    unsafe fn test_mm_cvt_roundsd_si32() {
60098        let a = _mm_set_pd(1., -1.5);
60099        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60100        let e: i32 = -1;
60101        assert_eq!(r, e);
60102    }
60103
60104    #[simd_test(enable = "avx512f")]
60105    unsafe fn test_mm_cvt_roundsd_i32() {
60106        let a = _mm_set_pd(1., -1.5);
60107        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60108        let e: i32 = -1;
60109        assert_eq!(r, e);
60110    }
60111
60112    #[simd_test(enable = "avx512f")]
60113    unsafe fn test_mm_cvt_roundsd_u32() {
60114        let a = _mm_set_pd(1., -1.5);
60115        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60116        let e: u32 = u32::MAX;
60117        assert_eq!(r, e);
60118    }
60119
60120    #[simd_test(enable = "avx512f")]
60121    unsafe fn test_mm_cvtsd_i32() {
60122        let a = _mm_set_pd(1., -1.5);
60123        let r = _mm_cvtsd_i32(a);
60124        let e: i32 = -2;
60125        assert_eq!(r, e);
60126    }
60127
60128    #[simd_test(enable = "avx512f")]
60129    unsafe fn test_mm_cvtsd_u32() {
60130        let a = _mm_set_pd(1., -1.5);
60131        let r = _mm_cvtsd_u32(a);
60132        let e: u32 = u32::MAX;
60133        assert_eq!(r, e);
60134    }
60135
60136    #[simd_test(enable = "avx512f")]
60137    unsafe fn test_mm_cvt_roundi32_ss() {
60138        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60139        let b: i32 = 9;
60140        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60141        let e = _mm_set_ps(0., -0.5, 1., 9.);
60142        assert_eq_m128(r, e);
60143    }
60144
60145    #[simd_test(enable = "avx512f")]
60146    unsafe fn test_mm_cvt_roundsi32_ss() {
60147        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60148        let b: i32 = 9;
60149        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60150        let e = _mm_set_ps(0., -0.5, 1., 9.);
60151        assert_eq_m128(r, e);
60152    }
60153
60154    #[simd_test(enable = "avx512f")]
60155    unsafe fn test_mm_cvt_roundu32_ss() {
60156        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60157        let b: u32 = 9;
60158        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60159        let e = _mm_set_ps(0., -0.5, 1., 9.);
60160        assert_eq_m128(r, e);
60161    }
60162
60163    #[simd_test(enable = "avx512f")]
60164    unsafe fn test_mm_cvti32_ss() {
60165        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60166        let b: i32 = 9;
60167        let r = _mm_cvti32_ss(a, b);
60168        let e = _mm_set_ps(0., -0.5, 1., 9.);
60169        assert_eq_m128(r, e);
60170    }
60171
60172    #[simd_test(enable = "avx512f")]
60173    unsafe fn test_mm_cvti32_sd() {
60174        let a = _mm_set_pd(1., -1.5);
60175        let b: i32 = 9;
60176        let r = _mm_cvti32_sd(a, b);
60177        let e = _mm_set_pd(1., 9.);
60178        assert_eq_m128d(r, e);
60179    }
60180
60181    #[simd_test(enable = "avx512f")]
60182    unsafe fn test_mm_cvtt_roundss_si32() {
60183        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60184        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60185        let e: i32 = -1;
60186        assert_eq!(r, e);
60187    }
60188
60189    #[simd_test(enable = "avx512f")]
60190    unsafe fn test_mm_cvtt_roundss_i32() {
60191        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60192        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60193        let e: i32 = -1;
60194        assert_eq!(r, e);
60195    }
60196
60197    #[simd_test(enable = "avx512f")]
60198    unsafe fn test_mm_cvtt_roundss_u32() {
60199        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60200        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60201        let e: u32 = u32::MAX;
60202        assert_eq!(r, e);
60203    }
60204
60205    #[simd_test(enable = "avx512f")]
60206    unsafe fn test_mm_cvttss_i32() {
60207        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60208        let r = _mm_cvttss_i32(a);
60209        let e: i32 = -1;
60210        assert_eq!(r, e);
60211    }
60212
60213    #[simd_test(enable = "avx512f")]
60214    unsafe fn test_mm_cvttss_u32() {
60215        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60216        let r = _mm_cvttss_u32(a);
60217        let e: u32 = u32::MAX;
60218        assert_eq!(r, e);
60219    }
60220
60221    #[simd_test(enable = "avx512f")]
60222    unsafe fn test_mm_cvtt_roundsd_si32() {
60223        let a = _mm_set_pd(1., -1.5);
60224        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60225        let e: i32 = -1;
60226        assert_eq!(r, e);
60227    }
60228
60229    #[simd_test(enable = "avx512f")]
60230    unsafe fn test_mm_cvtt_roundsd_i32() {
60231        let a = _mm_set_pd(1., -1.5);
60232        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60233        let e: i32 = -1;
60234        assert_eq!(r, e);
60235    }
60236
60237    #[simd_test(enable = "avx512f")]
60238    unsafe fn test_mm_cvtt_roundsd_u32() {
60239        let a = _mm_set_pd(1., -1.5);
60240        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60241        let e: u32 = u32::MAX;
60242        assert_eq!(r, e);
60243    }
60244
60245    #[simd_test(enable = "avx512f")]
60246    unsafe fn test_mm_cvttsd_i32() {
60247        let a = _mm_set_pd(1., -1.5);
60248        let r = _mm_cvttsd_i32(a);
60249        let e: i32 = -1;
60250        assert_eq!(r, e);
60251    }
60252
60253    #[simd_test(enable = "avx512f")]
60254    unsafe fn test_mm_cvttsd_u32() {
60255        let a = _mm_set_pd(1., -1.5);
60256        let r = _mm_cvttsd_u32(a);
60257        let e: u32 = u32::MAX;
60258        assert_eq!(r, e);
60259    }
60260
60261    #[simd_test(enable = "avx512f")]
60262    unsafe fn test_mm_cvtu32_ss() {
60263        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60264        let b: u32 = 9;
60265        let r = _mm_cvtu32_ss(a, b);
60266        let e = _mm_set_ps(0., -0.5, 1., 9.);
60267        assert_eq_m128(r, e);
60268    }
60269
60270    #[simd_test(enable = "avx512f")]
60271    unsafe fn test_mm_cvtu32_sd() {
60272        let a = _mm_set_pd(1., -1.5);
60273        let b: u32 = 9;
60274        let r = _mm_cvtu32_sd(a, b);
60275        let e = _mm_set_pd(1., 9.);
60276        assert_eq_m128d(r, e);
60277    }
60278
60279    #[simd_test(enable = "avx512f")]
60280    unsafe fn test_mm_comi_round_ss() {
60281        let a = _mm_set1_ps(2.2);
60282        let b = _mm_set1_ps(1.1);
60283        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60284        let e: i32 = 0;
60285        assert_eq!(r, e);
60286    }
60287
60288    #[simd_test(enable = "avx512f")]
60289    unsafe fn test_mm_comi_round_sd() {
60290        let a = _mm_set1_pd(2.2);
60291        let b = _mm_set1_pd(1.1);
60292        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60293        let e: i32 = 0;
60294        assert_eq!(r, e);
60295    }
60296
60297    #[simd_test(enable = "avx512f")]
60298    unsafe fn test_mm512_cvtsi512_si32() {
60299        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
60300        let r = _mm512_cvtsi512_si32(a);
60301        let e: i32 = 1;
60302        assert_eq!(r, e);
60303    }
60304
60305    #[simd_test(enable = "avx512f")]
60306    unsafe fn test_mm512_cvtss_f32() {
60307        let a = _mm512_setr_ps(
60308            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
60309        );
60310        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
60311    }
60312
60313    #[simd_test(enable = "avx512f")]
60314    unsafe fn test_mm512_cvtsd_f64() {
60315        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
60316        assert_eq!(r, -1.1);
60317    }
60318
60319    #[simd_test(enable = "avx512f")]
60320    unsafe fn test_mm512_shuffle_pd() {
60321        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60322        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60323        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
60324        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60325        assert_eq_m512d(r, e);
60326    }
60327
60328    #[simd_test(enable = "avx512f")]
60329    unsafe fn test_mm512_mask_shuffle_pd() {
60330        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60331        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60332        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
60333        assert_eq_m512d(r, a);
60334        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
60335        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60336        assert_eq_m512d(r, e);
60337    }
60338
60339    #[simd_test(enable = "avx512f")]
60340    unsafe fn test_mm512_maskz_shuffle_pd() {
60341        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60342        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60343        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
60344        assert_eq_m512d(r, _mm512_setzero_pd());
60345        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
60346        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
60347        assert_eq_m512d(r, e);
60348    }
60349
60350    #[simd_test(enable = "avx512f")]
60351    unsafe fn test_mm512_mask_expandloadu_epi32() {
60352        let src = _mm512_set1_epi32(42);
60353        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60354        let p = a.as_ptr();
60355        let m = 0b11101000_11001010;
60356        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60357        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
60358        assert_eq_m512i(r, e);
60359    }
60360
60361    #[simd_test(enable = "avx512f")]
60362    unsafe fn test_mm512_maskz_expandloadu_epi32() {
60363        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60364        let p = a.as_ptr();
60365        let m = 0b11101000_11001010;
60366        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60367        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
60368        assert_eq_m512i(r, e);
60369    }
60370
60371    #[simd_test(enable = "avx512f,avx512vl")]
60372    unsafe fn test_mm256_mask_expandloadu_epi32() {
60373        let src = _mm256_set1_epi32(42);
60374        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60375        let p = a.as_ptr();
60376        let m = 0b11101000;
60377        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60378        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
60379        assert_eq_m256i(r, e);
60380    }
60381
60382    #[simd_test(enable = "avx512f,avx512vl")]
60383    unsafe fn test_mm256_maskz_expandloadu_epi32() {
60384        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60385        let p = a.as_ptr();
60386        let m = 0b11101000;
60387        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60388        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
60389        assert_eq_m256i(r, e);
60390    }
60391
60392    #[simd_test(enable = "avx512f,avx512vl")]
60393    unsafe fn test_mm_mask_expandloadu_epi32() {
60394        let src = _mm_set1_epi32(42);
60395        let a = &[1_i32, 2, 3, 4];
60396        let p = a.as_ptr();
60397        let m = 0b11111000;
60398        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60399        let e = _mm_set_epi32(1, 42, 42, 42);
60400        assert_eq_m128i(r, e);
60401    }
60402
60403    #[simd_test(enable = "avx512f,avx512vl")]
60404    unsafe fn test_mm_maskz_expandloadu_epi32() {
60405        let a = &[1_i32, 2, 3, 4];
60406        let p = a.as_ptr();
60407        let m = 0b11111000;
60408        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60409        let e = _mm_set_epi32(1, 0, 0, 0);
60410        assert_eq_m128i(r, e);
60411    }
60412
60413    #[simd_test(enable = "avx512f")]
60414    unsafe fn test_mm512_mask_expandloadu_epi64() {
60415        let src = _mm512_set1_epi64(42);
60416        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60417        let p = a.as_ptr();
60418        let m = 0b11101000;
60419        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60420        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
60421        assert_eq_m512i(r, e);
60422    }
60423
60424    #[simd_test(enable = "avx512f")]
60425    unsafe fn test_mm512_maskz_expandloadu_epi64() {
60426        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60427        let p = a.as_ptr();
60428        let m = 0b11101000;
60429        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60430        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
60431        assert_eq_m512i(r, e);
60432    }
60433
60434    #[simd_test(enable = "avx512f,avx512vl")]
60435    unsafe fn test_mm256_mask_expandloadu_epi64() {
60436        let src = _mm256_set1_epi64x(42);
60437        let a = &[1_i64, 2, 3, 4];
60438        let p = a.as_ptr();
60439        let m = 0b11101000;
60440        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60441        let e = _mm256_set_epi64x(1, 42, 42, 42);
60442        assert_eq_m256i(r, e);
60443    }
60444
60445    #[simd_test(enable = "avx512f,avx512vl")]
60446    unsafe fn test_mm256_maskz_expandloadu_epi64() {
60447        let a = &[1_i64, 2, 3, 4];
60448        let p = a.as_ptr();
60449        let m = 0b11101000;
60450        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60451        let e = _mm256_set_epi64x(1, 0, 0, 0);
60452        assert_eq_m256i(r, e);
60453    }
60454
60455    #[simd_test(enable = "avx512f,avx512vl")]
60456    unsafe fn test_mm_mask_expandloadu_epi64() {
60457        let src = _mm_set1_epi64x(42);
60458        let a = &[1_i64, 2];
60459        let p = a.as_ptr();
60460        let m = 0b11101000;
60461        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60462        let e = _mm_set_epi64x(42, 42);
60463        assert_eq_m128i(r, e);
60464    }
60465
60466    #[simd_test(enable = "avx512f,avx512vl")]
60467    unsafe fn test_mm_maskz_expandloadu_epi64() {
60468        let a = &[1_i64, 2];
60469        let p = a.as_ptr();
60470        let m = 0b11101000;
60471        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60472        let e = _mm_set_epi64x(0, 0);
60473        assert_eq_m128i(r, e);
60474    }
60475
60476    #[simd_test(enable = "avx512f")]
60477    unsafe fn test_mm512_mask_expandloadu_ps() {
60478        let src = _mm512_set1_ps(42.);
60479        let a = &[
60480            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60481        ];
60482        let p = a.as_ptr();
60483        let m = 0b11101000_11001010;
60484        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60485        let e = _mm512_set_ps(
60486            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
60487        );
60488        assert_eq_m512(r, e);
60489    }
60490
60491    #[simd_test(enable = "avx512f")]
60492    unsafe fn test_mm512_maskz_expandloadu_ps() {
60493        let a = &[
60494            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60495        ];
60496        let p = a.as_ptr();
60497        let m = 0b11101000_11001010;
60498        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60499        let e = _mm512_set_ps(
60500            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
60501        );
60502        assert_eq_m512(r, e);
60503    }
60504
60505    #[simd_test(enable = "avx512f,avx512vl")]
60506    unsafe fn test_mm256_mask_expandloadu_ps() {
60507        let src = _mm256_set1_ps(42.);
60508        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60509        let p = a.as_ptr();
60510        let m = 0b11101000;
60511        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60512        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
60513        assert_eq_m256(r, e);
60514    }
60515
60516    #[simd_test(enable = "avx512f,avx512vl")]
60517    unsafe fn test_mm256_maskz_expandloadu_ps() {
60518        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60519        let p = a.as_ptr();
60520        let m = 0b11101000;
60521        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60522        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
60523        assert_eq_m256(r, e);
60524    }
60525
60526    #[simd_test(enable = "avx512f,avx512vl")]
60527    unsafe fn test_mm_mask_expandloadu_ps() {
60528        let src = _mm_set1_ps(42.);
60529        let a = &[1.0f32, 2., 3., 4.];
60530        let p = a.as_ptr();
60531        let m = 0b11101000;
60532        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60533        let e = _mm_set_ps(1., 42., 42., 42.);
60534        assert_eq_m128(r, e);
60535    }
60536
60537    #[simd_test(enable = "avx512f,avx512vl")]
60538    unsafe fn test_mm_maskz_expandloadu_ps() {
60539        let a = &[1.0f32, 2., 3., 4.];
60540        let p = a.as_ptr();
60541        let m = 0b11101000;
60542        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60543        let e = _mm_set_ps(1., 0., 0., 0.);
60544        assert_eq_m128(r, e);
60545    }
60546
60547    #[simd_test(enable = "avx512f")]
60548    unsafe fn test_mm512_mask_expandloadu_pd() {
60549        let src = _mm512_set1_pd(42.);
60550        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60551        let p = a.as_ptr();
60552        let m = 0b11101000;
60553        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60554        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
60555        assert_eq_m512d(r, e);
60556    }
60557
60558    #[simd_test(enable = "avx512f")]
60559    unsafe fn test_mm512_maskz_expandloadu_pd() {
60560        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60561        let p = a.as_ptr();
60562        let m = 0b11101000;
60563        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60564        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
60565        assert_eq_m512d(r, e);
60566    }
60567
60568    #[simd_test(enable = "avx512f,avx512vl")]
60569    unsafe fn test_mm256_mask_expandloadu_pd() {
60570        let src = _mm256_set1_pd(42.);
60571        let a = &[1.0f64, 2., 3., 4.];
60572        let p = a.as_ptr();
60573        let m = 0b11101000;
60574        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60575        let e = _mm256_set_pd(1., 42., 42., 42.);
60576        assert_eq_m256d(r, e);
60577    }
60578
60579    #[simd_test(enable = "avx512f,avx512vl")]
60580    unsafe fn test_mm256_maskz_expandloadu_pd() {
60581        let a = &[1.0f64, 2., 3., 4.];
60582        let p = a.as_ptr();
60583        let m = 0b11101000;
60584        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60585        let e = _mm256_set_pd(1., 0., 0., 0.);
60586        assert_eq_m256d(r, e);
60587    }
60588
60589    #[simd_test(enable = "avx512f,avx512vl")]
60590    unsafe fn test_mm_mask_expandloadu_pd() {
60591        let src = _mm_set1_pd(42.);
60592        let a = &[1.0f64, 2.];
60593        let p = a.as_ptr();
60594        let m = 0b11101000;
60595        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60596        let e = _mm_set_pd(42., 42.);
60597        assert_eq_m128d(r, e);
60598    }
60599
60600    #[simd_test(enable = "avx512f,avx512vl")]
60601    unsafe fn test_mm_maskz_expandloadu_pd() {
60602        let a = &[1.0f64, 2.];
60603        let p = a.as_ptr();
60604        let m = 0b11101000;
60605        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60606        let e = _mm_set_pd(0., 0.);
60607        assert_eq_m128d(r, e);
60608    }
60609}