Skip to main content

core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
18pub const fn _mm512_abs_epi16(a: __m512i) -> __m512i {
19    unsafe {
20        let a = a.as_i16x32();
21        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
22        transmute(simd_select(cmp, a, simd_neg(a)))
23    }
24}
25
26/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
29#[inline]
30#[target_feature(enable = "avx512bw")]
31#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32#[cfg_attr(test, assert_instr(vpabsw))]
33#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34pub const fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
35    unsafe {
36        let abs = _mm512_abs_epi16(a).as_i16x32();
37        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
38    }
39}
40
41/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
42///
43/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
44#[inline]
45#[target_feature(enable = "avx512bw")]
46#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
49pub const fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
50    unsafe {
51        let abs = _mm512_abs_epi16(a).as_i16x32();
52        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
53    }
54}
55
56/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
59#[inline]
60#[target_feature(enable = "avx512bw,avx512vl")]
61#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
62#[cfg_attr(test, assert_instr(vpabsw))]
63#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
64pub const fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
65    unsafe {
66        let abs = _mm256_abs_epi16(a).as_i16x16();
67        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
68    }
69}
70
71/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
74#[inline]
75#[target_feature(enable = "avx512bw,avx512vl")]
76#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
77#[cfg_attr(test, assert_instr(vpabsw))]
78#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
79pub const fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
80    unsafe {
81        let abs = _mm256_abs_epi16(a).as_i16x16();
82        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
83    }
84}
85
86/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
87///
88/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
89#[inline]
90#[target_feature(enable = "avx512bw,avx512vl")]
91#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92#[cfg_attr(test, assert_instr(vpabsw))]
93#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
94pub const fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
95    unsafe {
96        let abs = _mm_abs_epi16(a).as_i16x8();
97        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
98    }
99}
100
101/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
102///
103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
104#[inline]
105#[target_feature(enable = "avx512bw,avx512vl")]
106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
107#[cfg_attr(test, assert_instr(vpabsw))]
108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
109pub const fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi16(a).as_i16x8();
112        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
119#[inline]
120#[target_feature(enable = "avx512bw")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsb))]
123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
124pub const fn _mm512_abs_epi8(a: __m512i) -> __m512i {
125    unsafe {
126        let a = a.as_i8x64();
127        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
128        transmute(simd_select(cmp, a, simd_neg(a)))
129    }
130}
131
132/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
135#[inline]
136#[target_feature(enable = "avx512bw")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vpabsb))]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
141    unsafe {
142        let abs = _mm512_abs_epi8(a).as_i8x64();
143        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
144    }
145}
146
147/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
150#[inline]
151#[target_feature(enable = "avx512bw")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpabsb))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
156    unsafe {
157        let abs = _mm512_abs_epi8(a).as_i8x64();
158        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
159    }
160}
161
162/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
163///
164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
165#[inline]
166#[target_feature(enable = "avx512bw,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpabsb))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
171    unsafe {
172        let abs = _mm256_abs_epi8(a).as_i8x32();
173        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
174    }
175}
176
177/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
180#[inline]
181#[target_feature(enable = "avx512bw,avx512vl")]
182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
183#[cfg_attr(test, assert_instr(vpabsb))]
184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
185pub const fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
186    unsafe {
187        let abs = _mm256_abs_epi8(a).as_i8x32();
188        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
189    }
190}
191
192/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
195#[inline]
196#[target_feature(enable = "avx512bw,avx512vl")]
197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
198#[cfg_attr(test, assert_instr(vpabsb))]
199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
200pub const fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
201    unsafe {
202        let abs = _mm_abs_epi8(a).as_i8x16();
203        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
204    }
205}
206
207/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
210#[inline]
211#[target_feature(enable = "avx512bw,avx512vl")]
212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
213#[cfg_attr(test, assert_instr(vpabsb))]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
216    unsafe {
217        let abs = _mm_abs_epi8(a).as_i8x16();
218        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
219    }
220}
221
222/// Add packed 16-bit integers in a and b, and store the results in dst.
223///
224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
225#[inline]
226#[target_feature(enable = "avx512bw")]
227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
228#[cfg_attr(test, assert_instr(vpaddw))]
229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
230pub const fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
231    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
232}
233
234/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
235///
236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
237#[inline]
238#[target_feature(enable = "avx512bw")]
239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
240#[cfg_attr(test, assert_instr(vpaddw))]
241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
242pub const fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
243    unsafe {
244        let add = _mm512_add_epi16(a, b).as_i16x32();
245        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
246    }
247}
248
249/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
252#[inline]
253#[target_feature(enable = "avx512bw")]
254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
255#[cfg_attr(test, assert_instr(vpaddw))]
256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
257pub const fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
258    unsafe {
259        let add = _mm512_add_epi16(a, b).as_i16x32();
260        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
261    }
262}
263
264/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
267#[inline]
268#[target_feature(enable = "avx512bw,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpaddw))]
271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
272pub const fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
273    unsafe {
274        let add = _mm256_add_epi16(a, b).as_i16x16();
275        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
276    }
277}
278
279/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
282#[inline]
283#[target_feature(enable = "avx512bw,avx512vl")]
284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
285#[cfg_attr(test, assert_instr(vpaddw))]
286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
287pub const fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
288    unsafe {
289        let add = _mm256_add_epi16(a, b).as_i16x16();
290        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
291    }
292}
293
294/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
297#[inline]
298#[target_feature(enable = "avx512bw,avx512vl")]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[cfg_attr(test, assert_instr(vpaddw))]
301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
302pub const fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
303    unsafe {
304        let add = _mm_add_epi16(a, b).as_i16x8();
305        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
306    }
307}
308
309/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
312#[inline]
313#[target_feature(enable = "avx512bw,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpaddw))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
318    unsafe {
319        let add = _mm_add_epi16(a, b).as_i16x8();
320        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
321    }
322}
323
324/// Add packed 8-bit integers in a and b, and store the results in dst.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
327#[inline]
328#[target_feature(enable = "avx512bw")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpaddb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
333    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
334}
335
336/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
339#[inline]
340#[target_feature(enable = "avx512bw")]
341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
342#[cfg_attr(test, assert_instr(vpaddb))]
343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
344pub const fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
345    unsafe {
346        let add = _mm512_add_epi8(a, b).as_i8x64();
347        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
348    }
349}
350
351/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
354#[inline]
355#[target_feature(enable = "avx512bw")]
356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
357#[cfg_attr(test, assert_instr(vpaddb))]
358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
359pub const fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
360    unsafe {
361        let add = _mm512_add_epi8(a, b).as_i8x64();
362        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
363    }
364}
365
366/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
367///
368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
369#[inline]
370#[target_feature(enable = "avx512bw,avx512vl")]
371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
372#[cfg_attr(test, assert_instr(vpaddb))]
373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
374pub const fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
375    unsafe {
376        let add = _mm256_add_epi8(a, b).as_i8x32();
377        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
378    }
379}
380
381/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
384#[inline]
385#[target_feature(enable = "avx512bw,avx512vl")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpaddb))]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
390    unsafe {
391        let add = _mm256_add_epi8(a, b).as_i8x32();
392        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
393    }
394}
395
396/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
399#[inline]
400#[target_feature(enable = "avx512bw,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpaddb))]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
405    unsafe {
406        let add = _mm_add_epi8(a, b).as_i8x16();
407        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
408    }
409}
410
411/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
412///
413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
414#[inline]
415#[target_feature(enable = "avx512bw,avx512vl")]
416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
417#[cfg_attr(test, assert_instr(vpaddb))]
418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
419pub const fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
420    unsafe {
421        let add = _mm_add_epi8(a, b).as_i8x16();
422        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
423    }
424}
425
426/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
427///
428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
429#[inline]
430#[target_feature(enable = "avx512bw")]
431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
432#[cfg_attr(test, assert_instr(vpaddusw))]
433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
434pub const fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
435    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
436}
437
438/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
439///
440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
441#[inline]
442#[target_feature(enable = "avx512bw")]
443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
444#[cfg_attr(test, assert_instr(vpaddusw))]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
447    unsafe {
448        let add = _mm512_adds_epu16(a, b).as_u16x32();
449        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
450    }
451}
452
453/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
456#[inline]
457#[target_feature(enable = "avx512bw")]
458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
459#[cfg_attr(test, assert_instr(vpaddusw))]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
462    unsafe {
463        let add = _mm512_adds_epu16(a, b).as_u16x32();
464        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
465    }
466}
467
468/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
469///
470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
471#[inline]
472#[target_feature(enable = "avx512bw,avx512vl")]
473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
474#[cfg_attr(test, assert_instr(vpaddusw))]
475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
476pub const fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
477    unsafe {
478        let add = _mm256_adds_epu16(a, b).as_u16x16();
479        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
480    }
481}
482
483/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
486#[inline]
487#[target_feature(enable = "avx512bw,avx512vl")]
488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
489#[cfg_attr(test, assert_instr(vpaddusw))]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491pub const fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
492    unsafe {
493        let add = _mm256_adds_epu16(a, b).as_u16x16();
494        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
495    }
496}
497
498/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
499///
500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
501#[inline]
502#[target_feature(enable = "avx512bw,avx512vl")]
503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
504#[cfg_attr(test, assert_instr(vpaddusw))]
505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
506pub const fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
507    unsafe {
508        let add = _mm_adds_epu16(a, b).as_u16x8();
509        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
510    }
511}
512
513/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
516#[inline]
517#[target_feature(enable = "avx512bw,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vpaddusw))]
520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
521pub const fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
522    unsafe {
523        let add = _mm_adds_epu16(a, b).as_u16x8();
524        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
525    }
526}
527
528/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
529///
530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
531#[inline]
532#[target_feature(enable = "avx512bw")]
533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
534#[cfg_attr(test, assert_instr(vpaddusb))]
535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
536pub const fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
537    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
538}
539
540/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
541///
542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
543#[inline]
544#[target_feature(enable = "avx512bw")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vpaddusb))]
547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
548pub const fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
549    unsafe {
550        let add = _mm512_adds_epu8(a, b).as_u8x64();
551        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
552    }
553}
554
555/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
558#[inline]
559#[target_feature(enable = "avx512bw")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vpaddusb))]
562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
563pub const fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
564    unsafe {
565        let add = _mm512_adds_epu8(a, b).as_u8x64();
566        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
567    }
568}
569
570/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
571///
572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
573#[inline]
574#[target_feature(enable = "avx512bw,avx512vl")]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[cfg_attr(test, assert_instr(vpaddusb))]
577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
578pub const fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
579    unsafe {
580        let add = _mm256_adds_epu8(a, b).as_u8x32();
581        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
582    }
583}
584
585/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
586///
587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
588#[inline]
589#[target_feature(enable = "avx512bw,avx512vl")]
590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
591#[cfg_attr(test, assert_instr(vpaddusb))]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
594    unsafe {
595        let add = _mm256_adds_epu8(a, b).as_u8x32();
596        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
597    }
598}
599
600/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
603#[inline]
604#[target_feature(enable = "avx512bw,avx512vl")]
605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
606#[cfg_attr(test, assert_instr(vpaddusb))]
607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
608pub const fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
609    unsafe {
610        let add = _mm_adds_epu8(a, b).as_u8x16();
611        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
612    }
613}
614
615/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
616///
617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
618#[inline]
619#[target_feature(enable = "avx512bw,avx512vl")]
620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
621#[cfg_attr(test, assert_instr(vpaddusb))]
622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
623pub const fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
624    unsafe {
625        let add = _mm_adds_epu8(a, b).as_u8x16();
626        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
627    }
628}
629
630/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
633#[inline]
634#[target_feature(enable = "avx512bw")]
635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
636#[cfg_attr(test, assert_instr(vpaddsw))]
637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
638pub const fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
639    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
640}
641
642/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
643///
644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
645#[inline]
646#[target_feature(enable = "avx512bw")]
647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
648#[cfg_attr(test, assert_instr(vpaddsw))]
649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
650pub const fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
651    unsafe {
652        let add = _mm512_adds_epi16(a, b).as_i16x32();
653        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
654    }
655}
656
657/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
660#[inline]
661#[target_feature(enable = "avx512bw")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(vpaddsw))]
664#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
665pub const fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
666    unsafe {
667        let add = _mm512_adds_epi16(a, b).as_i16x32();
668        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
669    }
670}
671
672/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
675#[inline]
676#[target_feature(enable = "avx512bw,avx512vl")]
677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
678#[cfg_attr(test, assert_instr(vpaddsw))]
679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
680pub const fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
681    unsafe {
682        let add = _mm256_adds_epi16(a, b).as_i16x16();
683        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
684    }
685}
686
687/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
690#[inline]
691#[target_feature(enable = "avx512bw,avx512vl")]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[cfg_attr(test, assert_instr(vpaddsw))]
694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
695pub const fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
696    unsafe {
697        let add = _mm256_adds_epi16(a, b).as_i16x16();
698        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
699    }
700}
701
702/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
703///
704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
705#[inline]
706#[target_feature(enable = "avx512bw,avx512vl")]
707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
708#[cfg_attr(test, assert_instr(vpaddsw))]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
711    unsafe {
712        let add = _mm_adds_epi16(a, b).as_i16x8();
713        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
714    }
715}
716
717/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
720#[inline]
721#[target_feature(enable = "avx512bw,avx512vl")]
722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
723#[cfg_attr(test, assert_instr(vpaddsw))]
724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
725pub const fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
726    unsafe {
727        let add = _mm_adds_epi16(a, b).as_i16x8();
728        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
729    }
730}
731
732/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
733///
734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
735#[inline]
736#[target_feature(enable = "avx512bw")]
737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
738#[cfg_attr(test, assert_instr(vpaddsb))]
739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
740pub const fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
741    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
742}
743
744/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
745///
746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
747#[inline]
748#[target_feature(enable = "avx512bw")]
749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
750#[cfg_attr(test, assert_instr(vpaddsb))]
751#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
752pub const fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_adds_epi8(a, b).as_i8x64();
755        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
756    }
757}
758
759/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
762#[inline]
763#[target_feature(enable = "avx512bw")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddsb))]
766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
767pub const fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
768    unsafe {
769        let add = _mm512_adds_epi8(a, b).as_i8x64();
770        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
771    }
772}
773
774/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
775///
776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
777#[inline]
778#[target_feature(enable = "avx512bw,avx512vl")]
779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
780#[cfg_attr(test, assert_instr(vpaddsb))]
781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
782pub const fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
783    unsafe {
784        let add = _mm256_adds_epi8(a, b).as_i8x32();
785        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
786    }
787}
788
789/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
792#[inline]
793#[target_feature(enable = "avx512bw,avx512vl")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpaddsb))]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
798    unsafe {
799        let add = _mm256_adds_epi8(a, b).as_i8x32();
800        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
801    }
802}
803
804/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
805///
806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
807#[inline]
808#[target_feature(enable = "avx512bw,avx512vl")]
809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
810#[cfg_attr(test, assert_instr(vpaddsb))]
811#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
812pub const fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
813    unsafe {
814        let add = _mm_adds_epi8(a, b).as_i8x16();
815        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
816    }
817}
818
819/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
820///
821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
822#[inline]
823#[target_feature(enable = "avx512bw,avx512vl")]
824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
825#[cfg_attr(test, assert_instr(vpaddsb))]
826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
827pub const fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
828    unsafe {
829        let add = _mm_adds_epi8(a, b).as_i8x16();
830        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
831    }
832}
833
834/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
835///
836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
837#[inline]
838#[target_feature(enable = "avx512bw")]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[cfg_attr(test, assert_instr(vpsubw))]
841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
842pub const fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
843    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
844}
845
846/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
849#[inline]
850#[target_feature(enable = "avx512bw")]
851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
852#[cfg_attr(test, assert_instr(vpsubw))]
853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
854pub const fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
855    unsafe {
856        let sub = _mm512_sub_epi16(a, b).as_i16x32();
857        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
858    }
859}
860
861/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
864#[inline]
865#[target_feature(enable = "avx512bw")]
866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
867#[cfg_attr(test, assert_instr(vpsubw))]
868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
869pub const fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
870    unsafe {
871        let sub = _mm512_sub_epi16(a, b).as_i16x32();
872        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
873    }
874}
875
876/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
879#[inline]
880#[target_feature(enable = "avx512bw,avx512vl")]
881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
882#[cfg_attr(test, assert_instr(vpsubw))]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
885    unsafe {
886        let sub = _mm256_sub_epi16(a, b).as_i16x16();
887        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
888    }
889}
890
891/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
892///
893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
894#[inline]
895#[target_feature(enable = "avx512bw,avx512vl")]
896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
897#[cfg_attr(test, assert_instr(vpsubw))]
898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
899pub const fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
900    unsafe {
901        let sub = _mm256_sub_epi16(a, b).as_i16x16();
902        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
903    }
904}
905
906/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
909#[inline]
910#[target_feature(enable = "avx512bw,avx512vl")]
911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
912#[cfg_attr(test, assert_instr(vpsubw))]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
915    unsafe {
916        let sub = _mm_sub_epi16(a, b).as_i16x8();
917        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
918    }
919}
920
921/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
924#[inline]
925#[target_feature(enable = "avx512bw,avx512vl")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vpsubw))]
928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
929pub const fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
930    unsafe {
931        let sub = _mm_sub_epi16(a, b).as_i16x8();
932        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
933    }
934}
935
936/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
937///
938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
939#[inline]
940#[target_feature(enable = "avx512bw")]
941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
942#[cfg_attr(test, assert_instr(vpsubb))]
943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
944pub const fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
945    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
946}
947
948/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
949///
950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
951#[inline]
952#[target_feature(enable = "avx512bw")]
953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
954#[cfg_attr(test, assert_instr(vpsubb))]
955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
956pub const fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
957    unsafe {
958        let sub = _mm512_sub_epi8(a, b).as_i8x64();
959        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
960    }
961}
962
963/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
966#[inline]
967#[target_feature(enable = "avx512bw")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vpsubb))]
970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
971pub const fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
972    unsafe {
973        let sub = _mm512_sub_epi8(a, b).as_i8x64();
974        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
975    }
976}
977
978/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
979///
980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
981#[inline]
982#[target_feature(enable = "avx512bw,avx512vl")]
983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
984#[cfg_attr(test, assert_instr(vpsubb))]
985#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
986pub const fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
987    unsafe {
988        let sub = _mm256_sub_epi8(a, b).as_i8x32();
989        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
990    }
991}
992
993/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
996#[inline]
997#[target_feature(enable = "avx512bw,avx512vl")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubb))]
1000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1001pub const fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1002    unsafe {
1003        let sub = _mm256_sub_epi8(a, b).as_i8x32();
1004        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1005    }
1006}
1007
1008/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1009///
1010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
1011#[inline]
1012#[target_feature(enable = "avx512bw,avx512vl")]
1013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1014#[cfg_attr(test, assert_instr(vpsubb))]
1015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1016pub const fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1017    unsafe {
1018        let sub = _mm_sub_epi8(a, b).as_i8x16();
1019        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1020    }
1021}
1022
1023/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
1026#[inline]
1027#[target_feature(enable = "avx512bw,avx512vl")]
1028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029#[cfg_attr(test, assert_instr(vpsubb))]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1032    unsafe {
1033        let sub = _mm_sub_epi8(a, b).as_i8x16();
1034        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1035    }
1036}
1037
1038/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
1039///
1040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
1041#[inline]
1042#[target_feature(enable = "avx512bw")]
1043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1044#[cfg_attr(test, assert_instr(vpsubusw))]
1045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1046pub const fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
1047    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
1048}
1049
1050/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1051///
1052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
1053#[inline]
1054#[target_feature(enable = "avx512bw")]
1055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1056#[cfg_attr(test, assert_instr(vpsubusw))]
1057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1058pub const fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1059    unsafe {
1060        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1061        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
1062    }
1063}
1064
1065/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1066///
1067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
1068#[inline]
1069#[target_feature(enable = "avx512bw")]
1070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1071#[cfg_attr(test, assert_instr(vpsubusw))]
1072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1073pub const fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1074    unsafe {
1075        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1076        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1077    }
1078}
1079
1080/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1083#[inline]
1084#[target_feature(enable = "avx512bw,avx512vl")]
1085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086#[cfg_attr(test, assert_instr(vpsubusw))]
1087#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1088pub const fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1089    unsafe {
1090        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1091        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1092    }
1093}
1094
1095/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1096///
1097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1098#[inline]
1099#[target_feature(enable = "avx512bw,avx512vl")]
1100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1101#[cfg_attr(test, assert_instr(vpsubusw))]
1102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1103pub const fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1104    unsafe {
1105        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1106        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1107    }
1108}
1109
1110/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1113#[inline]
1114#[target_feature(enable = "avx512bw,avx512vl")]
1115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1116#[cfg_attr(test, assert_instr(vpsubusw))]
1117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1118pub const fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1119    unsafe {
1120        let sub = _mm_subs_epu16(a, b).as_u16x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1122    }
1123}
1124
1125/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1128#[inline]
1129#[target_feature(enable = "avx512bw,avx512vl")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubusw))]
1132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1133pub const fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1134    unsafe {
1135        let sub = _mm_subs_epu16(a, b).as_u16x8();
1136        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1137    }
1138}
1139
1140/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1141///
1142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1143#[inline]
1144#[target_feature(enable = "avx512bw")]
1145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1146#[cfg_attr(test, assert_instr(vpsubusb))]
1147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1148pub const fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1149    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1150}
1151
1152/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1153///
1154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1155#[inline]
1156#[target_feature(enable = "avx512bw")]
1157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158#[cfg_attr(test, assert_instr(vpsubusb))]
1159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1160pub const fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1161    unsafe {
1162        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1163        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1164    }
1165}
1166
1167/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1170#[inline]
1171#[target_feature(enable = "avx512bw")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubusb))]
1174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1175pub const fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1176    unsafe {
1177        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1178        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1179    }
1180}
1181
1182/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1183///
1184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1185#[inline]
1186#[target_feature(enable = "avx512bw,avx512vl")]
1187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1188#[cfg_attr(test, assert_instr(vpsubusb))]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1191    unsafe {
1192        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1193        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1194    }
1195}
1196
1197/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubusb))]
1204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1205pub const fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1206    unsafe {
1207        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1208        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1209    }
1210}
1211
1212/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1213///
1214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1215#[inline]
1216#[target_feature(enable = "avx512bw,avx512vl")]
1217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1218#[cfg_attr(test, assert_instr(vpsubusb))]
1219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1220pub const fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1221    unsafe {
1222        let sub = _mm_subs_epu8(a, b).as_u8x16();
1223        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1224    }
1225}
1226
1227/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1228///
1229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1230#[inline]
1231#[target_feature(enable = "avx512bw,avx512vl")]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[cfg_attr(test, assert_instr(vpsubusb))]
1234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1235pub const fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1236    unsafe {
1237        let sub = _mm_subs_epu8(a, b).as_u8x16();
1238        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1239    }
1240}
1241
1242/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1243///
1244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1245#[inline]
1246#[target_feature(enable = "avx512bw")]
1247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1248#[cfg_attr(test, assert_instr(vpsubsw))]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1251    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1252}
1253
1254/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1255///
1256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1257#[inline]
1258#[target_feature(enable = "avx512bw")]
1259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1260#[cfg_attr(test, assert_instr(vpsubsw))]
1261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1262pub const fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1263    unsafe {
1264        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1265        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1266    }
1267}
1268
1269/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1270///
1271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1272#[inline]
1273#[target_feature(enable = "avx512bw")]
1274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1275#[cfg_attr(test, assert_instr(vpsubsw))]
1276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1277pub const fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1278    unsafe {
1279        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1280        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1281    }
1282}
1283
1284/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1285///
1286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1287#[inline]
1288#[target_feature(enable = "avx512bw,avx512vl")]
1289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1290#[cfg_attr(test, assert_instr(vpsubsw))]
1291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1292pub const fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1293    unsafe {
1294        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1295        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1296    }
1297}
1298
1299/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1300///
1301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1302#[inline]
1303#[target_feature(enable = "avx512bw,avx512vl")]
1304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1305#[cfg_attr(test, assert_instr(vpsubsw))]
1306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1307pub const fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1308    unsafe {
1309        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1310        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1311    }
1312}
1313
1314/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1315///
1316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1317#[inline]
1318#[target_feature(enable = "avx512bw,avx512vl")]
1319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1320#[cfg_attr(test, assert_instr(vpsubsw))]
1321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1322pub const fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1323    unsafe {
1324        let sub = _mm_subs_epi16(a, b).as_i16x8();
1325        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1326    }
1327}
1328
1329/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1332#[inline]
1333#[target_feature(enable = "avx512bw,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vpsubsw))]
1336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1337pub const fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1338    unsafe {
1339        let sub = _mm_subs_epi16(a, b).as_i16x8();
1340        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1341    }
1342}
1343
1344/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1347#[inline]
1348#[target_feature(enable = "avx512bw")]
1349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1350#[cfg_attr(test, assert_instr(vpsubsb))]
1351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1352pub const fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1353    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1354}
1355
1356/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1359#[inline]
1360#[target_feature(enable = "avx512bw")]
1361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1362#[cfg_attr(test, assert_instr(vpsubsb))]
1363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1364pub const fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1365    unsafe {
1366        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1367        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1368    }
1369}
1370
1371/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1374#[inline]
1375#[target_feature(enable = "avx512bw")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vpsubsb))]
1378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1379pub const fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1380    unsafe {
1381        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1382        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1383    }
1384}
1385
1386/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1389#[inline]
1390#[target_feature(enable = "avx512bw,avx512vl")]
1391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1392#[cfg_attr(test, assert_instr(vpsubsb))]
1393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1394pub const fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1395    unsafe {
1396        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1397        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1398    }
1399}
1400
1401/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1402///
1403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1404#[inline]
1405#[target_feature(enable = "avx512bw,avx512vl")]
1406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1407#[cfg_attr(test, assert_instr(vpsubsb))]
1408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1409pub const fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1410    unsafe {
1411        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1412        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1413    }
1414}
1415
1416/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1417///
1418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1419#[inline]
1420#[target_feature(enable = "avx512bw,avx512vl")]
1421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1422#[cfg_attr(test, assert_instr(vpsubsb))]
1423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1424pub const fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1425    unsafe {
1426        let sub = _mm_subs_epi8(a, b).as_i8x16();
1427        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1428    }
1429}
1430
1431/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1434#[inline]
1435#[target_feature(enable = "avx512bw,avx512vl")]
1436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1437#[cfg_attr(test, assert_instr(vpsubsb))]
1438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1439pub const fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1440    unsafe {
1441        let sub = _mm_subs_epi8(a, b).as_i8x16();
1442        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1443    }
1444}
1445
1446/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1447///
1448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1449#[inline]
1450#[target_feature(enable = "avx512bw")]
1451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1452#[cfg_attr(test, assert_instr(vpmulhuw))]
1453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1454pub const fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1455    unsafe {
1456        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1457        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1458        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1459        transmute(simd_cast::<u32x32, u16x32>(r))
1460    }
1461}
1462
1463/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1464///
1465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1466#[inline]
1467#[target_feature(enable = "avx512bw")]
1468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1469#[cfg_attr(test, assert_instr(vpmulhuw))]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_mulhi_epu16(
1472    src: __m512i,
1473    k: __mmask32,
1474    a: __m512i,
1475    b: __m512i,
1476) -> __m512i {
1477    unsafe {
1478        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1479        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1480    }
1481}
1482
1483/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1484///
1485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1486#[inline]
1487#[target_feature(enable = "avx512bw")]
1488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1489#[cfg_attr(test, assert_instr(vpmulhuw))]
1490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1491pub const fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1492    unsafe {
1493        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1494        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1495    }
1496}
1497
1498/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1499///
1500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1501#[inline]
1502#[target_feature(enable = "avx512bw,avx512vl")]
1503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1504#[cfg_attr(test, assert_instr(vpmulhuw))]
1505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1506pub const fn _mm256_mask_mulhi_epu16(
1507    src: __m256i,
1508    k: __mmask16,
1509    a: __m256i,
1510    b: __m256i,
1511) -> __m256i {
1512    unsafe {
1513        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1514        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1515    }
1516}
1517
1518/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1519///
1520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1521#[inline]
1522#[target_feature(enable = "avx512bw,avx512vl")]
1523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1524#[cfg_attr(test, assert_instr(vpmulhuw))]
1525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1526pub const fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1527    unsafe {
1528        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1529        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1530    }
1531}
1532
1533/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1534///
1535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1536#[inline]
1537#[target_feature(enable = "avx512bw,avx512vl")]
1538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1539#[cfg_attr(test, assert_instr(vpmulhuw))]
1540#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1541pub const fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1544        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1545    }
1546}
1547
1548/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1551#[inline]
1552#[target_feature(enable = "avx512bw,avx512vl")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhuw))]
1555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1556pub const fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1557    unsafe {
1558        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1559        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1560    }
1561}
1562
1563/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1564///
1565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1566#[inline]
1567#[target_feature(enable = "avx512bw")]
1568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1569#[cfg_attr(test, assert_instr(vpmulhw))]
1570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1571pub const fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1572    unsafe {
1573        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1574        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1575        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1576        transmute(simd_cast::<i32x32, i16x32>(r))
1577    }
1578}
1579
1580/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1581///
1582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1583#[inline]
1584#[target_feature(enable = "avx512bw")]
1585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1586#[cfg_attr(test, assert_instr(vpmulhw))]
1587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1588pub const fn _mm512_mask_mulhi_epi16(
1589    src: __m512i,
1590    k: __mmask32,
1591    a: __m512i,
1592    b: __m512i,
1593) -> __m512i {
1594    unsafe {
1595        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1596        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1597    }
1598}
1599
1600/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1601///
1602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1603#[inline]
1604#[target_feature(enable = "avx512bw")]
1605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1606#[cfg_attr(test, assert_instr(vpmulhw))]
1607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1608pub const fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1609    unsafe {
1610        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1611        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1612    }
1613}
1614
1615/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhw))]
1622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1623pub const fn _mm256_mask_mulhi_epi16(
1624    src: __m256i,
1625    k: __mmask16,
1626    a: __m256i,
1627    b: __m256i,
1628) -> __m256i {
1629    unsafe {
1630        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1631        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1632    }
1633}
1634
1635/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1636///
1637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1638#[inline]
1639#[target_feature(enable = "avx512bw,avx512vl")]
1640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1641#[cfg_attr(test, assert_instr(vpmulhw))]
1642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1643pub const fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1644    unsafe {
1645        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1646        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1647    }
1648}
1649
1650/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1653#[inline]
1654#[target_feature(enable = "avx512bw,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmulhw))]
1657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1658pub const fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1659    unsafe {
1660        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1661        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1662    }
1663}
1664
1665/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1666///
1667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1668#[inline]
1669#[target_feature(enable = "avx512bw,avx512vl")]
1670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1671#[cfg_attr(test, assert_instr(vpmulhw))]
1672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1673pub const fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1674    unsafe {
1675        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1676        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1677    }
1678}
1679
1680/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1683#[inline]
1684#[target_feature(enable = "avx512bw")]
1685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1686#[cfg_attr(test, assert_instr(vpmulhrsw))]
1687pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1688    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1689}
1690
1691/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1692///
1693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1694#[inline]
1695#[target_feature(enable = "avx512bw")]
1696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1697#[cfg_attr(test, assert_instr(vpmulhrsw))]
1698pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1699    unsafe {
1700        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1701        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1702    }
1703}
1704
1705/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1708#[inline]
1709#[target_feature(enable = "avx512bw")]
1710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1711#[cfg_attr(test, assert_instr(vpmulhrsw))]
1712pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1713    unsafe {
1714        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1715        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1716    }
1717}
1718
1719/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1720///
1721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1722#[inline]
1723#[target_feature(enable = "avx512bw,avx512vl")]
1724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1725#[cfg_attr(test, assert_instr(vpmulhrsw))]
1726pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1727    unsafe {
1728        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1729        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1730    }
1731}
1732
1733/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1734///
1735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1736#[inline]
1737#[target_feature(enable = "avx512bw,avx512vl")]
1738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1739#[cfg_attr(test, assert_instr(vpmulhrsw))]
1740pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1741    unsafe {
1742        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1743        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1744    }
1745}
1746
1747/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1748///
1749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1750#[inline]
1751#[target_feature(enable = "avx512bw,avx512vl")]
1752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1753#[cfg_attr(test, assert_instr(vpmulhrsw))]
1754pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1755    unsafe {
1756        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1757        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1758    }
1759}
1760
1761/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1764#[inline]
1765#[target_feature(enable = "avx512bw,avx512vl")]
1766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1767#[cfg_attr(test, assert_instr(vpmulhrsw))]
1768pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1769    unsafe {
1770        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1771        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1772    }
1773}
1774
1775/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1778#[inline]
1779#[target_feature(enable = "avx512bw")]
1780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1781#[cfg_attr(test, assert_instr(vpmullw))]
1782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1783pub const fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1784    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1785}
1786
1787/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1790#[inline]
1791#[target_feature(enable = "avx512bw")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vpmullw))]
1794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1795pub const fn _mm512_mask_mullo_epi16(
1796    src: __m512i,
1797    k: __mmask32,
1798    a: __m512i,
1799    b: __m512i,
1800) -> __m512i {
1801    unsafe {
1802        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1803        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1804    }
1805}
1806
1807/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1808///
1809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1810#[inline]
1811#[target_feature(enable = "avx512bw")]
1812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1813#[cfg_attr(test, assert_instr(vpmullw))]
1814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1815pub const fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1816    unsafe {
1817        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1818        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1819    }
1820}
1821
1822/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1823///
1824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1825#[inline]
1826#[target_feature(enable = "avx512bw,avx512vl")]
1827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1828#[cfg_attr(test, assert_instr(vpmullw))]
1829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1830pub const fn _mm256_mask_mullo_epi16(
1831    src: __m256i,
1832    k: __mmask16,
1833    a: __m256i,
1834    b: __m256i,
1835) -> __m256i {
1836    unsafe {
1837        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1838        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1839    }
1840}
1841
1842/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1843///
1844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1845#[inline]
1846#[target_feature(enable = "avx512bw,avx512vl")]
1847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1848#[cfg_attr(test, assert_instr(vpmullw))]
1849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1850pub const fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1851    unsafe {
1852        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1853        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1854    }
1855}
1856
1857/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1858///
1859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1860#[inline]
1861#[target_feature(enable = "avx512bw,avx512vl")]
1862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1863#[cfg_attr(test, assert_instr(vpmullw))]
1864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1865pub const fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1866    unsafe {
1867        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1868        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1869    }
1870}
1871
1872/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1875#[inline]
1876#[target_feature(enable = "avx512bw,avx512vl")]
1877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1878#[cfg_attr(test, assert_instr(vpmullw))]
1879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1880pub const fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1881    unsafe {
1882        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1883        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1884    }
1885}
1886
1887/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1888///
1889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1890#[inline]
1891#[target_feature(enable = "avx512bw")]
1892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1893#[cfg_attr(test, assert_instr(vpmaxuw))]
1894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1895pub const fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1896    unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
1897}
1898
1899/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1900///
1901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1902#[inline]
1903#[target_feature(enable = "avx512bw")]
1904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1905#[cfg_attr(test, assert_instr(vpmaxuw))]
1906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1907pub const fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1908    unsafe {
1909        let max = _mm512_max_epu16(a, b).as_u16x32();
1910        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1911    }
1912}
1913
1914/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1915///
1916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1917#[inline]
1918#[target_feature(enable = "avx512bw")]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[cfg_attr(test, assert_instr(vpmaxuw))]
1921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1922pub const fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1923    unsafe {
1924        let max = _mm512_max_epu16(a, b).as_u16x32();
1925        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1926    }
1927}
1928
1929/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1930///
1931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1932#[inline]
1933#[target_feature(enable = "avx512bw,avx512vl")]
1934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1935#[cfg_attr(test, assert_instr(vpmaxuw))]
1936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1937pub const fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1938    unsafe {
1939        let max = _mm256_max_epu16(a, b).as_u16x16();
1940        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1941    }
1942}
1943
1944/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1945///
1946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1947#[inline]
1948#[target_feature(enable = "avx512bw,avx512vl")]
1949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1950#[cfg_attr(test, assert_instr(vpmaxuw))]
1951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1952pub const fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1953    unsafe {
1954        let max = _mm256_max_epu16(a, b).as_u16x16();
1955        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1956    }
1957}
1958
1959/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1960///
1961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1962#[inline]
1963#[target_feature(enable = "avx512bw,avx512vl")]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[cfg_attr(test, assert_instr(vpmaxuw))]
1966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1967pub const fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1968    unsafe {
1969        let max = _mm_max_epu16(a, b).as_u16x8();
1970        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1971    }
1972}
1973
1974/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1975///
1976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1977#[inline]
1978#[target_feature(enable = "avx512bw,avx512vl")]
1979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1980#[cfg_attr(test, assert_instr(vpmaxuw))]
1981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1982pub const fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1983    unsafe {
1984        let max = _mm_max_epu16(a, b).as_u16x8();
1985        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1986    }
1987}
1988
1989/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1990///
1991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1992#[inline]
1993#[target_feature(enable = "avx512bw")]
1994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1995#[cfg_attr(test, assert_instr(vpmaxub))]
1996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1997pub const fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1998    unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
1999}
2000
2001/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2002///
2003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
2004#[inline]
2005#[target_feature(enable = "avx512bw")]
2006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2007#[cfg_attr(test, assert_instr(vpmaxub))]
2008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2009pub const fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2010    unsafe {
2011        let max = _mm512_max_epu8(a, b).as_u8x64();
2012        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
2013    }
2014}
2015
2016/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
2019#[inline]
2020#[target_feature(enable = "avx512bw")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vpmaxub))]
2023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2024pub const fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2025    unsafe {
2026        let max = _mm512_max_epu8(a, b).as_u8x64();
2027        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
2028    }
2029}
2030
2031/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
2034#[inline]
2035#[target_feature(enable = "avx512bw,avx512vl")]
2036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037#[cfg_attr(test, assert_instr(vpmaxub))]
2038#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2039pub const fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2040    unsafe {
2041        let max = _mm256_max_epu8(a, b).as_u8x32();
2042        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
2043    }
2044}
2045
2046/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
2049#[inline]
2050#[target_feature(enable = "avx512bw,avx512vl")]
2051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2052#[cfg_attr(test, assert_instr(vpmaxub))]
2053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2054pub const fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2055    unsafe {
2056        let max = _mm256_max_epu8(a, b).as_u8x32();
2057        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
2058    }
2059}
2060
2061/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
2064#[inline]
2065#[target_feature(enable = "avx512bw,avx512vl")]
2066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2067#[cfg_attr(test, assert_instr(vpmaxub))]
2068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2069pub const fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2070    unsafe {
2071        let max = _mm_max_epu8(a, b).as_u8x16();
2072        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
2073    }
2074}
2075
2076/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2077///
2078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
2079#[inline]
2080#[target_feature(enable = "avx512bw,avx512vl")]
2081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2082#[cfg_attr(test, assert_instr(vpmaxub))]
2083#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2084pub const fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2085    unsafe {
2086        let max = _mm_max_epu8(a, b).as_u8x16();
2087        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
2088    }
2089}
2090
2091/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
2092///
2093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
2094#[inline]
2095#[target_feature(enable = "avx512bw")]
2096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2097#[cfg_attr(test, assert_instr(vpmaxsw))]
2098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2099pub const fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
2100    unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2101}
2102
2103/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2104///
2105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
2106#[inline]
2107#[target_feature(enable = "avx512bw")]
2108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2109#[cfg_attr(test, assert_instr(vpmaxsw))]
2110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2111pub const fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2112    unsafe {
2113        let max = _mm512_max_epi16(a, b).as_i16x32();
2114        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
2115    }
2116}
2117
2118/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
2121#[inline]
2122#[target_feature(enable = "avx512bw")]
2123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2124#[cfg_attr(test, assert_instr(vpmaxsw))]
2125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2126pub const fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2127    unsafe {
2128        let max = _mm512_max_epi16(a, b).as_i16x32();
2129        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
2130    }
2131}
2132
2133/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2134///
2135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
2136#[inline]
2137#[target_feature(enable = "avx512bw,avx512vl")]
2138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2139#[cfg_attr(test, assert_instr(vpmaxsw))]
2140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2141pub const fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2142    unsafe {
2143        let max = _mm256_max_epi16(a, b).as_i16x16();
2144        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
2145    }
2146}
2147
2148/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2149///
2150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
2151#[inline]
2152#[target_feature(enable = "avx512bw,avx512vl")]
2153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2154#[cfg_attr(test, assert_instr(vpmaxsw))]
2155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2156pub const fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2157    unsafe {
2158        let max = _mm256_max_epi16(a, b).as_i16x16();
2159        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2160    }
2161}
2162
2163/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2166#[inline]
2167#[target_feature(enable = "avx512bw,avx512vl")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpmaxsw))]
2170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2171pub const fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2172    unsafe {
2173        let max = _mm_max_epi16(a, b).as_i16x8();
2174        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2175    }
2176}
2177
2178/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2179///
2180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2181#[inline]
2182#[target_feature(enable = "avx512bw,avx512vl")]
2183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2184#[cfg_attr(test, assert_instr(vpmaxsw))]
2185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2186pub const fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2187    unsafe {
2188        let max = _mm_max_epi16(a, b).as_i16x8();
2189        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2190    }
2191}
2192
2193/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2194///
2195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2196#[inline]
2197#[target_feature(enable = "avx512bw")]
2198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2199#[cfg_attr(test, assert_instr(vpmaxsb))]
2200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2201pub const fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2202    unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2203}
2204
2205/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2208#[inline]
2209#[target_feature(enable = "avx512bw")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpmaxsb))]
2212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2213pub const fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2214    unsafe {
2215        let max = _mm512_max_epi8(a, b).as_i8x64();
2216        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2217    }
2218}
2219
2220/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2221///
2222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2223#[inline]
2224#[target_feature(enable = "avx512bw")]
2225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2226#[cfg_attr(test, assert_instr(vpmaxsb))]
2227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2228pub const fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2229    unsafe {
2230        let max = _mm512_max_epi8(a, b).as_i8x64();
2231        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2232    }
2233}
2234
2235/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2236///
2237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2238#[inline]
2239#[target_feature(enable = "avx512bw,avx512vl")]
2240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2241#[cfg_attr(test, assert_instr(vpmaxsb))]
2242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2243pub const fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2244    unsafe {
2245        let max = _mm256_max_epi8(a, b).as_i8x32();
2246        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2247    }
2248}
2249
2250/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2253#[inline]
2254#[target_feature(enable = "avx512bw,avx512vl")]
2255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2256#[cfg_attr(test, assert_instr(vpmaxsb))]
2257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2258pub const fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2259    unsafe {
2260        let max = _mm256_max_epi8(a, b).as_i8x32();
2261        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2262    }
2263}
2264
2265/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2266///
2267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2268#[inline]
2269#[target_feature(enable = "avx512bw,avx512vl")]
2270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2271#[cfg_attr(test, assert_instr(vpmaxsb))]
2272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2273pub const fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2274    unsafe {
2275        let max = _mm_max_epi8(a, b).as_i8x16();
2276        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2277    }
2278}
2279
2280/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2281///
2282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2283#[inline]
2284#[target_feature(enable = "avx512bw,avx512vl")]
2285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2286#[cfg_attr(test, assert_instr(vpmaxsb))]
2287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2288pub const fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2289    unsafe {
2290        let max = _mm_max_epi8(a, b).as_i8x16();
2291        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2292    }
2293}
2294
2295/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2296///
2297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2298#[inline]
2299#[target_feature(enable = "avx512bw")]
2300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2301#[cfg_attr(test, assert_instr(vpminuw))]
2302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2303pub const fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2304    unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
2305}
2306
2307/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2310#[inline]
2311#[target_feature(enable = "avx512bw")]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313#[cfg_attr(test, assert_instr(vpminuw))]
2314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2315pub const fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2316    unsafe {
2317        let min = _mm512_min_epu16(a, b).as_u16x32();
2318        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2319    }
2320}
2321
2322/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2323///
2324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2325#[inline]
2326#[target_feature(enable = "avx512bw")]
2327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2328#[cfg_attr(test, assert_instr(vpminuw))]
2329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2330pub const fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2331    unsafe {
2332        let min = _mm512_min_epu16(a, b).as_u16x32();
2333        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2334    }
2335}
2336
2337/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2338///
2339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2340#[inline]
2341#[target_feature(enable = "avx512bw,avx512vl")]
2342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2343#[cfg_attr(test, assert_instr(vpminuw))]
2344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2345pub const fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2346    unsafe {
2347        let min = _mm256_min_epu16(a, b).as_u16x16();
2348        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2349    }
2350}
2351
2352/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2353///
2354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2355#[inline]
2356#[target_feature(enable = "avx512bw,avx512vl")]
2357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2358#[cfg_attr(test, assert_instr(vpminuw))]
2359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2360pub const fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2361    unsafe {
2362        let min = _mm256_min_epu16(a, b).as_u16x16();
2363        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2364    }
2365}
2366
2367/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2368///
2369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2370#[inline]
2371#[target_feature(enable = "avx512bw,avx512vl")]
2372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2373#[cfg_attr(test, assert_instr(vpminuw))]
2374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2375pub const fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2376    unsafe {
2377        let min = _mm_min_epu16(a, b).as_u16x8();
2378        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2379    }
2380}
2381
2382/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2383///
2384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2385#[inline]
2386#[target_feature(enable = "avx512bw,avx512vl")]
2387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2388#[cfg_attr(test, assert_instr(vpminuw))]
2389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2390pub const fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2391    unsafe {
2392        let min = _mm_min_epu16(a, b).as_u16x8();
2393        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2394    }
2395}
2396
2397/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2398///
2399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2400#[inline]
2401#[target_feature(enable = "avx512bw")]
2402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2403#[cfg_attr(test, assert_instr(vpminub))]
2404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2405pub const fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2406    unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
2407}
2408
2409/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2410///
2411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2412#[inline]
2413#[target_feature(enable = "avx512bw")]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415#[cfg_attr(test, assert_instr(vpminub))]
2416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2417pub const fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2418    unsafe {
2419        let min = _mm512_min_epu8(a, b).as_u8x64();
2420        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2421    }
2422}
2423
2424/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2425///
2426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2427#[inline]
2428#[target_feature(enable = "avx512bw")]
2429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2430#[cfg_attr(test, assert_instr(vpminub))]
2431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2432pub const fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2433    unsafe {
2434        let min = _mm512_min_epu8(a, b).as_u8x64();
2435        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2436    }
2437}
2438
2439/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2440///
2441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2442#[inline]
2443#[target_feature(enable = "avx512bw,avx512vl")]
2444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2445#[cfg_attr(test, assert_instr(vpminub))]
2446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2447pub const fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2448    unsafe {
2449        let min = _mm256_min_epu8(a, b).as_u8x32();
2450        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2451    }
2452}
2453
2454/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2457#[inline]
2458#[target_feature(enable = "avx512bw,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vpminub))]
2461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2462pub const fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2463    unsafe {
2464        let min = _mm256_min_epu8(a, b).as_u8x32();
2465        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2466    }
2467}
2468
2469/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2470///
2471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2472#[inline]
2473#[target_feature(enable = "avx512bw,avx512vl")]
2474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2475#[cfg_attr(test, assert_instr(vpminub))]
2476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2477pub const fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2478    unsafe {
2479        let min = _mm_min_epu8(a, b).as_u8x16();
2480        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2481    }
2482}
2483
2484/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2485///
2486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2487#[inline]
2488#[target_feature(enable = "avx512bw,avx512vl")]
2489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2490#[cfg_attr(test, assert_instr(vpminub))]
2491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2492pub const fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2493    unsafe {
2494        let min = _mm_min_epu8(a, b).as_u8x16();
2495        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2496    }
2497}
2498
2499/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2500///
2501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2502#[inline]
2503#[target_feature(enable = "avx512bw")]
2504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2505#[cfg_attr(test, assert_instr(vpminsw))]
2506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2507pub const fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2508    unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2509}
2510
2511/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2512///
2513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2514#[inline]
2515#[target_feature(enable = "avx512bw")]
2516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2517#[cfg_attr(test, assert_instr(vpminsw))]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2520    unsafe {
2521        let min = _mm512_min_epi16(a, b).as_i16x32();
2522        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2523    }
2524}
2525
2526/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2527///
2528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2529#[inline]
2530#[target_feature(enable = "avx512bw")]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532#[cfg_attr(test, assert_instr(vpminsw))]
2533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2534pub const fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2535    unsafe {
2536        let min = _mm512_min_epi16(a, b).as_i16x32();
2537        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2538    }
2539}
2540
2541/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2544#[inline]
2545#[target_feature(enable = "avx512bw,avx512vl")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpminsw))]
2548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2549pub const fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2550    unsafe {
2551        let min = _mm256_min_epi16(a, b).as_i16x16();
2552        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2553    }
2554}
2555
2556/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2557///
2558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2559#[inline]
2560#[target_feature(enable = "avx512bw,avx512vl")]
2561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2562#[cfg_attr(test, assert_instr(vpminsw))]
2563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2564pub const fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2565    unsafe {
2566        let min = _mm256_min_epi16(a, b).as_i16x16();
2567        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2568    }
2569}
2570
2571/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2572///
2573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2574#[inline]
2575#[target_feature(enable = "avx512bw,avx512vl")]
2576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2577#[cfg_attr(test, assert_instr(vpminsw))]
2578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2579pub const fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2580    unsafe {
2581        let min = _mm_min_epi16(a, b).as_i16x8();
2582        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2583    }
2584}
2585
2586/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2587///
2588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2589#[inline]
2590#[target_feature(enable = "avx512bw,avx512vl")]
2591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2592#[cfg_attr(test, assert_instr(vpminsw))]
2593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2594pub const fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2595    unsafe {
2596        let min = _mm_min_epi16(a, b).as_i16x8();
2597        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2598    }
2599}
2600
2601/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2604#[inline]
2605#[target_feature(enable = "avx512bw")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vpminsb))]
2608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2609pub const fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2610    unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2611}
2612
2613/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2614///
2615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2616#[inline]
2617#[target_feature(enable = "avx512bw")]
2618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2619#[cfg_attr(test, assert_instr(vpminsb))]
2620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2621pub const fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2622    unsafe {
2623        let min = _mm512_min_epi8(a, b).as_i8x64();
2624        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2625    }
2626}
2627
2628/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2631#[inline]
2632#[target_feature(enable = "avx512bw")]
2633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2634#[cfg_attr(test, assert_instr(vpminsb))]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2637    unsafe {
2638        let min = _mm512_min_epi8(a, b).as_i8x64();
2639        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2640    }
2641}
2642
2643/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2646#[inline]
2647#[target_feature(enable = "avx512bw,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vpminsb))]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2652    unsafe {
2653        let min = _mm256_min_epi8(a, b).as_i8x32();
2654        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2655    }
2656}
2657
2658/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2659///
2660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2661#[inline]
2662#[target_feature(enable = "avx512bw,avx512vl")]
2663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2664#[cfg_attr(test, assert_instr(vpminsb))]
2665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2666pub const fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2667    unsafe {
2668        let min = _mm256_min_epi8(a, b).as_i8x32();
2669        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2670    }
2671}
2672
2673/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2676#[inline]
2677#[target_feature(enable = "avx512bw,avx512vl")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpminsb))]
2680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2681pub const fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2682    unsafe {
2683        let min = _mm_min_epi8(a, b).as_i8x16();
2684        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2685    }
2686}
2687
2688/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2689///
2690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2691#[inline]
2692#[target_feature(enable = "avx512bw,avx512vl")]
2693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2694#[cfg_attr(test, assert_instr(vpminsb))]
2695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2696pub const fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2697    unsafe {
2698        let min = _mm_min_epi8(a, b).as_i8x16();
2699        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2700    }
2701}
2702
2703/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2704///
2705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2706#[inline]
2707#[target_feature(enable = "avx512bw")]
2708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2709#[cfg_attr(test, assert_instr(vpcmp))]
2710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2711pub const fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2712    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2713}
2714
2715/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2716///
2717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2718#[inline]
2719#[target_feature(enable = "avx512bw")]
2720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2721#[cfg_attr(test, assert_instr(vpcmp))]
2722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2723pub const fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2724    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2725}
2726
2727/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2728///
2729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2730#[inline]
2731#[target_feature(enable = "avx512bw,avx512vl")]
2732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2733#[cfg_attr(test, assert_instr(vpcmp))]
2734#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2735pub const fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2736    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2737}
2738
2739/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2742#[inline]
2743#[target_feature(enable = "avx512bw,avx512vl")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2747pub const fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2748    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2749}
2750
2751/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2752///
2753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2754#[inline]
2755#[target_feature(enable = "avx512bw,avx512vl")]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757#[cfg_attr(test, assert_instr(vpcmp))]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2760    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2761}
2762
2763/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2766#[inline]
2767#[target_feature(enable = "avx512bw,avx512vl")]
2768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2769#[cfg_attr(test, assert_instr(vpcmp))]
2770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2771pub const fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2772    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2773}
2774
2775/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2776///
2777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2778#[inline]
2779#[target_feature(enable = "avx512bw")]
2780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2781#[cfg_attr(test, assert_instr(vpcmp))]
2782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2783pub const fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2784    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2785}
2786
2787/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2788///
2789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2790#[inline]
2791#[target_feature(enable = "avx512bw")]
2792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2793#[cfg_attr(test, assert_instr(vpcmp))]
2794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2795pub const fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2796    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2797}
2798
2799/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2800///
2801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2802#[inline]
2803#[target_feature(enable = "avx512bw,avx512vl")]
2804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2805#[cfg_attr(test, assert_instr(vpcmp))]
2806#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2807pub const fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2808    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2809}
2810
2811/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2812///
2813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2814#[inline]
2815#[target_feature(enable = "avx512bw,avx512vl")]
2816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2817#[cfg_attr(test, assert_instr(vpcmp))]
2818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2819pub const fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2820    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2821}
2822
2823/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2826#[inline]
2827#[target_feature(enable = "avx512bw,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpcmp))]
2830#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2831pub const fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2832    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2833}
2834
2835/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2836///
2837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2838#[inline]
2839#[target_feature(enable = "avx512bw,avx512vl")]
2840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2841#[cfg_attr(test, assert_instr(vpcmp))]
2842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2843pub const fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2844    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2845}
2846
2847/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2848///
2849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2850#[inline]
2851#[target_feature(enable = "avx512bw")]
2852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2853#[cfg_attr(test, assert_instr(vpcmp))]
2854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2855pub const fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2856    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2857}
2858
2859/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2862#[inline]
2863#[target_feature(enable = "avx512bw")]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865#[cfg_attr(test, assert_instr(vpcmp))]
2866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2867pub const fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2868    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2869}
2870
2871/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2874#[inline]
2875#[target_feature(enable = "avx512bw,avx512vl")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2879pub const fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2880    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2881}
2882
2883/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2884///
2885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2886#[inline]
2887#[target_feature(enable = "avx512bw,avx512vl")]
2888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2889#[cfg_attr(test, assert_instr(vpcmp))]
2890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2891pub const fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2892    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2893}
2894
2895/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2898#[inline]
2899#[target_feature(enable = "avx512bw,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpcmp))]
2902#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2903pub const fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2904    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2905}
2906
2907/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2908///
2909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2910#[inline]
2911#[target_feature(enable = "avx512bw,avx512vl")]
2912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2913#[cfg_attr(test, assert_instr(vpcmp))]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2916    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2917}
2918
2919/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2920///
2921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2922#[inline]
2923#[target_feature(enable = "avx512bw")]
2924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2925#[cfg_attr(test, assert_instr(vpcmp))]
2926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2927pub const fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2928    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2929}
2930
2931/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2932///
2933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2934#[inline]
2935#[target_feature(enable = "avx512bw")]
2936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2937#[cfg_attr(test, assert_instr(vpcmp))]
2938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2939pub const fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2940    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2941}
2942
2943/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2944///
2945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2946#[inline]
2947#[target_feature(enable = "avx512bw,avx512vl")]
2948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2949#[cfg_attr(test, assert_instr(vpcmp))]
2950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2951pub const fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2952    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2953}
2954
2955/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2956///
2957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2958#[inline]
2959#[target_feature(enable = "avx512bw,avx512vl")]
2960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2961#[cfg_attr(test, assert_instr(vpcmp))]
2962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2963pub const fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2964    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2965}
2966
2967/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2968///
2969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2970#[inline]
2971#[target_feature(enable = "avx512bw,avx512vl")]
2972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2973#[cfg_attr(test, assert_instr(vpcmp))]
2974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2975pub const fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2976    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2977}
2978
2979/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2980///
2981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2982#[inline]
2983#[target_feature(enable = "avx512bw,avx512vl")]
2984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2985#[cfg_attr(test, assert_instr(vpcmp))]
2986#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2987pub const fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2988    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2989}
2990
2991/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2992///
2993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2994#[inline]
2995#[target_feature(enable = "avx512bw")]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997#[cfg_attr(test, assert_instr(vpcmp))]
2998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2999pub const fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3000    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
3001}
3002
3003/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3011pub const fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3012    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3013}
3014
3015/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3016///
3017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
3018#[inline]
3019#[target_feature(enable = "avx512bw,avx512vl")]
3020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3021#[cfg_attr(test, assert_instr(vpcmp))]
3022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3023pub const fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3024    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
3025}
3026
3027/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3028///
3029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
3030#[inline]
3031#[target_feature(enable = "avx512bw,avx512vl")]
3032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3033#[cfg_attr(test, assert_instr(vpcmp))]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3036    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3037}
3038
3039/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
3042#[inline]
3043#[target_feature(enable = "avx512bw,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vpcmp))]
3046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3047pub const fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3048    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
3049}
3050
3051/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3052///
3053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
3054#[inline]
3055#[target_feature(enable = "avx512bw,avx512vl")]
3056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3057#[cfg_attr(test, assert_instr(vpcmp))]
3058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3059pub const fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3060    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3061}
3062
3063/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3064///
3065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
3066#[inline]
3067#[target_feature(enable = "avx512bw")]
3068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3069#[cfg_attr(test, assert_instr(vpcmp))]
3070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3071pub const fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3072    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
3073}
3074
3075/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3076///
3077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
3078#[inline]
3079#[target_feature(enable = "avx512bw")]
3080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3081#[cfg_attr(test, assert_instr(vpcmp))]
3082#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3083pub const fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3084    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3085}
3086
3087/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3088///
3089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
3090#[inline]
3091#[target_feature(enable = "avx512bw,avx512vl")]
3092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3093#[cfg_attr(test, assert_instr(vpcmp))]
3094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3095pub const fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3096    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
3097}
3098
3099/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3100///
3101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
3102#[inline]
3103#[target_feature(enable = "avx512bw,avx512vl")]
3104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3105#[cfg_attr(test, assert_instr(vpcmp))]
3106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3107pub const fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3108    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3109}
3110
3111/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3112///
3113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
3114#[inline]
3115#[target_feature(enable = "avx512bw,avx512vl")]
3116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3117#[cfg_attr(test, assert_instr(vpcmp))]
3118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3119pub const fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3120    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
3121}
3122
3123/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
3126#[inline]
3127#[target_feature(enable = "avx512bw,avx512vl")]
3128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3129#[cfg_attr(test, assert_instr(vpcmp))]
3130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3131pub const fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3132    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3133}
3134
3135/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3143pub const fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3144    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
3145}
3146
3147/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3148///
3149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
3150#[inline]
3151#[target_feature(enable = "avx512bw")]
3152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3153#[cfg_attr(test, assert_instr(vpcmp))]
3154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3155pub const fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3156    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3157}
3158
3159/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3160///
3161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
3162#[inline]
3163#[target_feature(enable = "avx512bw,avx512vl")]
3164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3165#[cfg_attr(test, assert_instr(vpcmp))]
3166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3167pub const fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3168    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
3169}
3170
3171/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3172///
3173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
3174#[inline]
3175#[target_feature(enable = "avx512bw,avx512vl")]
3176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3177#[cfg_attr(test, assert_instr(vpcmp))]
3178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3179pub const fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3180    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3181}
3182
3183/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3184///
3185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
3186#[inline]
3187#[target_feature(enable = "avx512bw,avx512vl")]
3188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3189#[cfg_attr(test, assert_instr(vpcmp))]
3190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3191pub const fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3192    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
3193}
3194
3195/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3196///
3197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
3198#[inline]
3199#[target_feature(enable = "avx512bw,avx512vl")]
3200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3201#[cfg_attr(test, assert_instr(vpcmp))]
3202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3203pub const fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3204    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3205}
3206
3207/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3208///
3209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
3210#[inline]
3211#[target_feature(enable = "avx512bw")]
3212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3213#[cfg_attr(test, assert_instr(vpcmp))]
3214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3215pub const fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3216    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3217}
3218
3219/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3222#[inline]
3223#[target_feature(enable = "avx512bw")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpcmp))]
3226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3227pub const fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3228    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3229}
3230
3231/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3232///
3233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3234#[inline]
3235#[target_feature(enable = "avx512bw,avx512vl")]
3236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3237#[cfg_attr(test, assert_instr(vpcmp))]
3238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3239pub const fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3240    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3241}
3242
3243/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3244///
3245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3246#[inline]
3247#[target_feature(enable = "avx512bw,avx512vl")]
3248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3249#[cfg_attr(test, assert_instr(vpcmp))]
3250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3251pub const fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3252    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3253}
3254
3255/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3256///
3257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3258#[inline]
3259#[target_feature(enable = "avx512bw,avx512vl")]
3260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3261#[cfg_attr(test, assert_instr(vpcmp))]
3262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3263pub const fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3264    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3270#[inline]
3271#[target_feature(enable = "avx512bw,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3275pub const fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3276    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3277}
3278
3279/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3280///
3281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3282#[inline]
3283#[target_feature(enable = "avx512bw")]
3284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3285#[cfg_attr(test, assert_instr(vpcmp))]
3286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3287pub const fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3288    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3289}
3290
3291/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3292///
3293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3294#[inline]
3295#[target_feature(enable = "avx512bw")]
3296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3297#[cfg_attr(test, assert_instr(vpcmp))]
3298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3299pub const fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3300    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3301}
3302
3303/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3304///
3305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3306#[inline]
3307#[target_feature(enable = "avx512bw,avx512vl")]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309#[cfg_attr(test, assert_instr(vpcmp))]
3310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3311pub const fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3312    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3313}
3314
3315/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3316///
3317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3318#[inline]
3319#[target_feature(enable = "avx512bw,avx512vl")]
3320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3321#[cfg_attr(test, assert_instr(vpcmp))]
3322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3323pub const fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3324    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3325}
3326
3327/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3328///
3329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3330#[inline]
3331#[target_feature(enable = "avx512bw,avx512vl")]
3332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3333#[cfg_attr(test, assert_instr(vpcmp))]
3334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3335pub const fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3336    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3337}
3338
3339/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3340///
3341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3342#[inline]
3343#[target_feature(enable = "avx512bw,avx512vl")]
3344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3345#[cfg_attr(test, assert_instr(vpcmp))]
3346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3347pub const fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3348    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3349}
3350
3351/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3352///
3353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3354#[inline]
3355#[target_feature(enable = "avx512bw")]
3356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3357#[cfg_attr(test, assert_instr(vpcmp))]
3358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3359pub const fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3360    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3361}
3362
3363/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3364///
3365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3366#[inline]
3367#[target_feature(enable = "avx512bw")]
3368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3369#[cfg_attr(test, assert_instr(vpcmp))]
3370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3371pub const fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3372    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3373}
3374
3375/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3376///
3377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3378#[inline]
3379#[target_feature(enable = "avx512bw,avx512vl")]
3380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3381#[cfg_attr(test, assert_instr(vpcmp))]
3382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3383pub const fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3384    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3385}
3386
3387/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3388///
3389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3390#[inline]
3391#[target_feature(enable = "avx512bw,avx512vl")]
3392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3393#[cfg_attr(test, assert_instr(vpcmp))]
3394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3395pub const fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3396    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3402#[inline]
3403#[target_feature(enable = "avx512bw,avx512vl")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3407pub const fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3408    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3409}
3410
3411/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3412///
3413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3414#[inline]
3415#[target_feature(enable = "avx512bw,avx512vl")]
3416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3417#[cfg_attr(test, assert_instr(vpcmp))]
3418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3419pub const fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3420    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3421}
3422
3423/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3426#[inline]
3427#[target_feature(enable = "avx512bw")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vpcmp))]
3430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3431pub const fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3432    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3433}
3434
3435/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3436///
3437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3438#[inline]
3439#[target_feature(enable = "avx512bw")]
3440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3441#[cfg_attr(test, assert_instr(vpcmp))]
3442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3443pub const fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3444    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3445}
3446
3447/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3448///
3449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3450#[inline]
3451#[target_feature(enable = "avx512bw,avx512vl")]
3452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3453#[cfg_attr(test, assert_instr(vpcmp))]
3454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3455pub const fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3456    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3457}
3458
3459/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3460///
3461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3462#[inline]
3463#[target_feature(enable = "avx512bw,avx512vl")]
3464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3465#[cfg_attr(test, assert_instr(vpcmp))]
3466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3467pub const fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3468    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3469}
3470
3471/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3472///
3473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3474#[inline]
3475#[target_feature(enable = "avx512bw,avx512vl")]
3476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3477#[cfg_attr(test, assert_instr(vpcmp))]
3478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3479pub const fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3480    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3481}
3482
3483/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3484///
3485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3486#[inline]
3487#[target_feature(enable = "avx512bw,avx512vl")]
3488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3489#[cfg_attr(test, assert_instr(vpcmp))]
3490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3491pub const fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3492    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3493}
3494
3495/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3496///
3497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3498#[inline]
3499#[target_feature(enable = "avx512bw")]
3500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3501#[cfg_attr(test, assert_instr(vpcmp))]
3502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3503pub const fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3504    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3505}
3506
3507/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3508///
3509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3510#[inline]
3511#[target_feature(enable = "avx512bw")]
3512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3513#[cfg_attr(test, assert_instr(vpcmp))]
3514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3515pub const fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3516    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3517}
3518
3519/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3520///
3521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3522#[inline]
3523#[target_feature(enable = "avx512bw,avx512vl")]
3524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3525#[cfg_attr(test, assert_instr(vpcmp))]
3526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3527pub const fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3528    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3534#[inline]
3535#[target_feature(enable = "avx512bw,avx512vl")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3539pub const fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3540    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3541}
3542
3543/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3544///
3545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3546#[inline]
3547#[target_feature(enable = "avx512bw,avx512vl")]
3548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3549#[cfg_attr(test, assert_instr(vpcmp))]
3550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3551pub const fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3552    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3553}
3554
3555/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3558#[inline]
3559#[target_feature(enable = "avx512bw,avx512vl")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vpcmp))]
3562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3563pub const fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3564    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3565}
3566
3567/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3568///
3569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3570#[inline]
3571#[target_feature(enable = "avx512bw")]
3572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3573#[cfg_attr(test, assert_instr(vpcmp))]
3574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3575pub const fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3576    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3577}
3578
3579/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3580///
3581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3582#[inline]
3583#[target_feature(enable = "avx512bw")]
3584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3585#[cfg_attr(test, assert_instr(vpcmp))]
3586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3587pub const fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3588    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3589}
3590
3591/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3592///
3593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3594#[inline]
3595#[target_feature(enable = "avx512bw,avx512vl")]
3596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3597#[cfg_attr(test, assert_instr(vpcmp))]
3598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3599pub const fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3600    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3601}
3602
3603/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3604///
3605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3606#[inline]
3607#[target_feature(enable = "avx512bw,avx512vl")]
3608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3609#[cfg_attr(test, assert_instr(vpcmp))]
3610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3611pub const fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3612    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3613}
3614
3615/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3616///
3617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3618#[inline]
3619#[target_feature(enable = "avx512bw,avx512vl")]
3620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3621#[cfg_attr(test, assert_instr(vpcmp))]
3622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3623pub const fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3624    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3625}
3626
3627/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3628///
3629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3630#[inline]
3631#[target_feature(enable = "avx512bw,avx512vl")]
3632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3633#[cfg_attr(test, assert_instr(vpcmp))]
3634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3635pub const fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3636    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3637}
3638
3639/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3642#[inline]
3643#[target_feature(enable = "avx512bw")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vpcmp))]
3646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3647pub const fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3648    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3649}
3650
3651/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3652///
3653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3654#[inline]
3655#[target_feature(enable = "avx512bw")]
3656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3657#[cfg_attr(test, assert_instr(vpcmp))]
3658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3659pub const fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3660    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3666#[inline]
3667#[target_feature(enable = "avx512bw,avx512vl")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3671pub const fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3672    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3673}
3674
3675/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3676///
3677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3678#[inline]
3679#[target_feature(enable = "avx512bw,avx512vl")]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681#[cfg_attr(test, assert_instr(vpcmp))]
3682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3683pub const fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3684    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3685}
3686
3687/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3690#[inline]
3691#[target_feature(enable = "avx512bw,avx512vl")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vpcmp))]
3694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3695pub const fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3696    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3697}
3698
3699/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3700///
3701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3702#[inline]
3703#[target_feature(enable = "avx512bw,avx512vl")]
3704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3705#[cfg_attr(test, assert_instr(vpcmp))]
3706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3707pub const fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3708    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3709}
3710
3711/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3712///
3713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3714#[inline]
3715#[target_feature(enable = "avx512bw")]
3716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3717#[cfg_attr(test, assert_instr(vpcmp))]
3718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3719pub const fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3720    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3721}
3722
3723/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3724///
3725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3726#[inline]
3727#[target_feature(enable = "avx512bw")]
3728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3729#[cfg_attr(test, assert_instr(vpcmp))]
3730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3731pub const fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3732    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3733}
3734
3735/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3736///
3737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3738#[inline]
3739#[target_feature(enable = "avx512bw,avx512vl")]
3740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3741#[cfg_attr(test, assert_instr(vpcmp))]
3742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3743pub const fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3744    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3745}
3746
3747/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3748///
3749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3750#[inline]
3751#[target_feature(enable = "avx512bw,avx512vl")]
3752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3753#[cfg_attr(test, assert_instr(vpcmp))]
3754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3755pub const fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3756    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3757}
3758
3759/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3760///
3761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3762#[inline]
3763#[target_feature(enable = "avx512bw,avx512vl")]
3764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3765#[cfg_attr(test, assert_instr(vpcmp))]
3766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3767pub const fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3768    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3769}
3770
3771/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3772///
3773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3774#[inline]
3775#[target_feature(enable = "avx512bw,avx512vl")]
3776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3777#[cfg_attr(test, assert_instr(vpcmp))]
3778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3779pub const fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3780    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3781}
3782
3783/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3784///
3785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3786#[inline]
3787#[target_feature(enable = "avx512bw")]
3788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3789#[cfg_attr(test, assert_instr(vpcmp))]
3790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3791pub const fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3803pub const fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3804    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3805}
3806
3807/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3808///
3809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3810#[inline]
3811#[target_feature(enable = "avx512bw,avx512vl")]
3812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3813#[cfg_attr(test, assert_instr(vpcmp))]
3814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3815pub const fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3816    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3817}
3818
3819/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3822#[inline]
3823#[target_feature(enable = "avx512bw,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vpcmp))]
3826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3827pub const fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3828    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3829}
3830
3831/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3832///
3833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3834#[inline]
3835#[target_feature(enable = "avx512bw,avx512vl")]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837#[cfg_attr(test, assert_instr(vpcmp))]
3838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3839pub const fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3840    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3841}
3842
3843/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3844///
3845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3846#[inline]
3847#[target_feature(enable = "avx512bw,avx512vl")]
3848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3849#[cfg_attr(test, assert_instr(vpcmp))]
3850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3851pub const fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3852    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3853}
3854
3855/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3856///
3857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3858#[inline]
3859#[target_feature(enable = "avx512bw")]
3860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3861#[cfg_attr(test, assert_instr(vpcmp))]
3862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3863pub const fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3864    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3865}
3866
3867/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3868///
3869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3870#[inline]
3871#[target_feature(enable = "avx512bw")]
3872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3873#[cfg_attr(test, assert_instr(vpcmp))]
3874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3875pub const fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3876    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3877}
3878
3879/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3880///
3881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3882#[inline]
3883#[target_feature(enable = "avx512bw,avx512vl")]
3884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3885#[cfg_attr(test, assert_instr(vpcmp))]
3886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3887pub const fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3888    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3889}
3890
3891/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3892///
3893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3894#[inline]
3895#[target_feature(enable = "avx512bw,avx512vl")]
3896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3897#[cfg_attr(test, assert_instr(vpcmp))]
3898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3899pub const fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3900    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3901}
3902
3903/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3904///
3905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3906#[inline]
3907#[target_feature(enable = "avx512bw,avx512vl")]
3908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3909#[cfg_attr(test, assert_instr(vpcmp))]
3910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3911pub const fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3912    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3913}
3914
3915/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3916///
3917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3918#[inline]
3919#[target_feature(enable = "avx512bw,avx512vl")]
3920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3921#[cfg_attr(test, assert_instr(vpcmp))]
3922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3923pub const fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3924    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3935pub const fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3936    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3937}
3938
3939/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3940///
3941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3942#[inline]
3943#[target_feature(enable = "avx512bw")]
3944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3945#[cfg_attr(test, assert_instr(vpcmp))]
3946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3947pub const fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3948    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3949}
3950
3951/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3954#[inline]
3955#[target_feature(enable = "avx512bw,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vpcmp))]
3958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3959pub const fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3960    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3961}
3962
3963/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3964///
3965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3966#[inline]
3967#[target_feature(enable = "avx512bw,avx512vl")]
3968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3969#[cfg_attr(test, assert_instr(vpcmp))]
3970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3971pub const fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3972    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3973}
3974
3975/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3976///
3977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3978#[inline]
3979#[target_feature(enable = "avx512bw,avx512vl")]
3980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3981#[cfg_attr(test, assert_instr(vpcmp))]
3982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3983pub const fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3984    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3985}
3986
3987/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3988///
3989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3990#[inline]
3991#[target_feature(enable = "avx512bw,avx512vl")]
3992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3993#[cfg_attr(test, assert_instr(vpcmp))]
3994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3995pub const fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3996    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3997}
3998
3999/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4000///
4001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
4002#[inline]
4003#[target_feature(enable = "avx512bw")]
4004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4005#[cfg_attr(test, assert_instr(vpcmp))]
4006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4007pub const fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4008    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
4009}
4010
4011/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4012///
4013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
4014#[inline]
4015#[target_feature(enable = "avx512bw")]
4016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4017#[cfg_attr(test, assert_instr(vpcmp))]
4018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4019pub const fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4020    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4021}
4022
4023/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4024///
4025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
4026#[inline]
4027#[target_feature(enable = "avx512bw,avx512vl")]
4028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4029#[cfg_attr(test, assert_instr(vpcmp))]
4030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4031pub const fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4032    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
4033}
4034
4035/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4036///
4037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
4038#[inline]
4039#[target_feature(enable = "avx512bw,avx512vl")]
4040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4041#[cfg_attr(test, assert_instr(vpcmp))]
4042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4043pub const fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4044    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4045}
4046
4047/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
4050#[inline]
4051#[target_feature(enable = "avx512bw,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vpcmp))]
4054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4055pub const fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4056    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
4057}
4058
4059/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
4062#[inline]
4063#[target_feature(enable = "avx512bw,avx512vl")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4067pub const fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4068    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4069}
4070
4071/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4072///
4073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
4074#[inline]
4075#[target_feature(enable = "avx512bw")]
4076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4077#[cfg_attr(test, assert_instr(vpcmp))]
4078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4079pub const fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4080    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
4081}
4082
4083/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4084///
4085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
4086#[inline]
4087#[target_feature(enable = "avx512bw")]
4088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4089#[cfg_attr(test, assert_instr(vpcmp))]
4090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4091pub const fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4092    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4093}
4094
4095/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4096///
4097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
4098#[inline]
4099#[target_feature(enable = "avx512bw,avx512vl")]
4100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4101#[cfg_attr(test, assert_instr(vpcmp))]
4102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4103pub const fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4104    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
4105}
4106
4107/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4108///
4109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
4110#[inline]
4111#[target_feature(enable = "avx512bw,avx512vl")]
4112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4113#[cfg_attr(test, assert_instr(vpcmp))]
4114#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4115pub const fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4116    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4117}
4118
4119/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4120///
4121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
4122#[inline]
4123#[target_feature(enable = "avx512bw,avx512vl")]
4124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4125#[cfg_attr(test, assert_instr(vpcmp))]
4126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4127pub const fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4128    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
4129}
4130
4131/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4132///
4133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
4134#[inline]
4135#[target_feature(enable = "avx512bw,avx512vl")]
4136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4137#[cfg_attr(test, assert_instr(vpcmp))]
4138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4139pub const fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4140    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4141}
4142
4143/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4144///
4145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
4146#[inline]
4147#[target_feature(enable = "avx512bw")]
4148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4149#[cfg_attr(test, assert_instr(vpcmp))]
4150#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4151pub const fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4152    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
4153}
4154
4155/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4156///
4157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
4158#[inline]
4159#[target_feature(enable = "avx512bw")]
4160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4161#[cfg_attr(test, assert_instr(vpcmp))]
4162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4163pub const fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4164    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4165}
4166
4167/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4168///
4169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
4170#[inline]
4171#[target_feature(enable = "avx512bw,avx512vl")]
4172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4173#[cfg_attr(test, assert_instr(vpcmp))]
4174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4175pub const fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4176    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
4177}
4178
4179/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4180///
4181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
4182#[inline]
4183#[target_feature(enable = "avx512bw,avx512vl")]
4184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4185#[cfg_attr(test, assert_instr(vpcmp))]
4186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4187pub const fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4188    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4189}
4190
4191/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4192///
4193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
4194#[inline]
4195#[target_feature(enable = "avx512bw,avx512vl")]
4196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4197#[cfg_attr(test, assert_instr(vpcmp))]
4198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4199pub const fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4200    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
4201}
4202
4203/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4204///
4205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
4206#[inline]
4207#[target_feature(enable = "avx512bw,avx512vl")]
4208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4209#[cfg_attr(test, assert_instr(vpcmp))]
4210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4211pub const fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4212    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4213}
4214
4215/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4216///
4217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
4218#[inline]
4219#[target_feature(enable = "avx512bw")]
4220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4221#[cfg_attr(test, assert_instr(vpcmp))]
4222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4223pub const fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4224    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
4225}
4226
4227/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4228///
4229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
4230#[inline]
4231#[target_feature(enable = "avx512bw")]
4232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4233#[cfg_attr(test, assert_instr(vpcmp))]
4234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4235pub const fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4236    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4237}
4238
4239/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4240///
4241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
4242#[inline]
4243#[target_feature(enable = "avx512bw,avx512vl")]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245#[cfg_attr(test, assert_instr(vpcmp))]
4246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4247pub const fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4248    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
4249}
4250
4251/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4252///
4253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
4254#[inline]
4255#[target_feature(enable = "avx512bw,avx512vl")]
4256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4257#[cfg_attr(test, assert_instr(vpcmp))]
4258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4259pub const fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4260    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4261}
4262
4263/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4264///
4265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
4266#[inline]
4267#[target_feature(enable = "avx512bw,avx512vl")]
4268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4269#[cfg_attr(test, assert_instr(vpcmp))]
4270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4271pub const fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4272    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
4273}
4274
4275/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4276///
4277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
4278#[inline]
4279#[target_feature(enable = "avx512bw,avx512vl")]
4280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4281#[cfg_attr(test, assert_instr(vpcmp))]
4282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4283pub const fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4284    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4285}
4286
4287/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[cfg_attr(test, assert_instr(vpcmp))]
4294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4295pub const fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4296    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
4297}
4298
4299/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4300///
4301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
4302#[inline]
4303#[target_feature(enable = "avx512bw")]
4304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4305#[cfg_attr(test, assert_instr(vpcmp))]
4306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4307pub const fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4308    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4309}
4310
4311/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4312///
4313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4314#[inline]
4315#[target_feature(enable = "avx512bw,avx512vl")]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317#[cfg_attr(test, assert_instr(vpcmp))]
4318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4319pub const fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4320    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4321}
4322
4323/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4326#[inline]
4327#[target_feature(enable = "avx512bw,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vpcmp))]
4330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4331pub const fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4332    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4333}
4334
4335/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4336///
4337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4338#[inline]
4339#[target_feature(enable = "avx512bw,avx512vl")]
4340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4341#[cfg_attr(test, assert_instr(vpcmp))]
4342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4343pub const fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4344    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4345}
4346
4347/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4348///
4349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4350#[inline]
4351#[target_feature(enable = "avx512bw,avx512vl")]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353#[cfg_attr(test, assert_instr(vpcmp))]
4354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4355pub const fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4356    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4357}
4358
4359/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4360///
4361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4362#[inline]
4363#[target_feature(enable = "avx512bw")]
4364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4365#[cfg_attr(test, assert_instr(vpcmp))]
4366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4367pub const fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4368    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4369}
4370
4371/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4374#[inline]
4375#[target_feature(enable = "avx512bw")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vpcmp))]
4378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4379pub const fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4380    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4381}
4382
4383/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4384///
4385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4386#[inline]
4387#[target_feature(enable = "avx512bw,avx512vl")]
4388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4389#[cfg_attr(test, assert_instr(vpcmp))]
4390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4391pub const fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4392    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4393}
4394
4395/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4396///
4397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4398#[inline]
4399#[target_feature(enable = "avx512bw,avx512vl")]
4400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4401#[cfg_attr(test, assert_instr(vpcmp))]
4402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4403pub const fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4404    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4405}
4406
4407/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4408///
4409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4410#[inline]
4411#[target_feature(enable = "avx512bw,avx512vl")]
4412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4413#[cfg_attr(test, assert_instr(vpcmp))]
4414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4415pub const fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4416    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4417}
4418
4419/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4420///
4421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4422#[inline]
4423#[target_feature(enable = "avx512bw,avx512vl")]
4424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4425#[cfg_attr(test, assert_instr(vpcmp))]
4426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4427pub const fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4428    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4429}
4430
4431/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4432///
4433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4434#[inline]
4435#[target_feature(enable = "avx512bw")]
4436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4437#[rustc_legacy_const_generics(2)]
4438#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4440pub const fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u16x32();
4444        let b = b.as_u16x32();
4445        let r = match IMM8 {
4446            0 => simd_eq(a, b),
4447            1 => simd_lt(a, b),
4448            2 => simd_le(a, b),
4449            3 => i16x32::ZERO,
4450            4 => simd_ne(a, b),
4451            5 => simd_ge(a, b),
4452            6 => simd_gt(a, b),
4453            _ => i16x32::splat(-1),
4454        };
4455        simd_bitmask(r)
4456    }
4457}
4458
4459/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4462#[inline]
4463#[target_feature(enable = "avx512bw")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[rustc_legacy_const_generics(3)]
4466#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4468pub const fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4469    k1: __mmask32,
4470    a: __m512i,
4471    b: __m512i,
4472) -> __mmask32 {
4473    unsafe {
4474        static_assert_uimm_bits!(IMM8, 3);
4475        let a = a.as_u16x32();
4476        let b = b.as_u16x32();
4477        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4478        let r = match IMM8 {
4479            0 => simd_and(k1, simd_eq(a, b)),
4480            1 => simd_and(k1, simd_lt(a, b)),
4481            2 => simd_and(k1, simd_le(a, b)),
4482            3 => i16x32::ZERO,
4483            4 => simd_and(k1, simd_ne(a, b)),
4484            5 => simd_and(k1, simd_ge(a, b)),
4485            6 => simd_and(k1, simd_gt(a, b)),
4486            _ => k1,
4487        };
4488        simd_bitmask(r)
4489    }
4490}
4491
4492/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4495#[inline]
4496#[target_feature(enable = "avx512bw,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[rustc_legacy_const_generics(2)]
4499#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4502    unsafe {
4503        static_assert_uimm_bits!(IMM8, 3);
4504        let a = a.as_u16x16();
4505        let b = b.as_u16x16();
4506        let r = match IMM8 {
4507            0 => simd_eq(a, b),
4508            1 => simd_lt(a, b),
4509            2 => simd_le(a, b),
4510            3 => i16x16::ZERO,
4511            4 => simd_ne(a, b),
4512            5 => simd_ge(a, b),
4513            6 => simd_gt(a, b),
4514            _ => i16x16::splat(-1),
4515        };
4516        simd_bitmask(r)
4517    }
4518}
4519
4520/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4521///
4522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4523#[inline]
4524#[target_feature(enable = "avx512bw,avx512vl")]
4525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4526#[rustc_legacy_const_generics(3)]
4527#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4529pub const fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4530    k1: __mmask16,
4531    a: __m256i,
4532    b: __m256i,
4533) -> __mmask16 {
4534    unsafe {
4535        static_assert_uimm_bits!(IMM8, 3);
4536        let a = a.as_u16x16();
4537        let b = b.as_u16x16();
4538        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4539        let r = match IMM8 {
4540            0 => simd_and(k1, simd_eq(a, b)),
4541            1 => simd_and(k1, simd_lt(a, b)),
4542            2 => simd_and(k1, simd_le(a, b)),
4543            3 => i16x16::ZERO,
4544            4 => simd_and(k1, simd_ne(a, b)),
4545            5 => simd_and(k1, simd_ge(a, b)),
4546            6 => simd_and(k1, simd_gt(a, b)),
4547            _ => k1,
4548        };
4549        simd_bitmask(r)
4550    }
4551}
4552
4553/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4556#[inline]
4557#[target_feature(enable = "avx512bw,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[rustc_legacy_const_generics(2)]
4560#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4562pub const fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4563    unsafe {
4564        static_assert_uimm_bits!(IMM8, 3);
4565        let a = a.as_u16x8();
4566        let b = b.as_u16x8();
4567        let r = match IMM8 {
4568            0 => simd_eq(a, b),
4569            1 => simd_lt(a, b),
4570            2 => simd_le(a, b),
4571            3 => i16x8::ZERO,
4572            4 => simd_ne(a, b),
4573            5 => simd_ge(a, b),
4574            6 => simd_gt(a, b),
4575            _ => i16x8::splat(-1),
4576        };
4577        simd_bitmask(r)
4578    }
4579}
4580
4581/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4584#[inline]
4585#[target_feature(enable = "avx512bw,avx512vl")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[rustc_legacy_const_generics(3)]
4588#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4590pub const fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
4591    k1: __mmask8,
4592    a: __m128i,
4593    b: __m128i,
4594) -> __mmask8 {
4595    unsafe {
4596        static_assert_uimm_bits!(IMM8, 3);
4597        let a = a.as_u16x8();
4598        let b = b.as_u16x8();
4599        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4600        let r = match IMM8 {
4601            0 => simd_and(k1, simd_eq(a, b)),
4602            1 => simd_and(k1, simd_lt(a, b)),
4603            2 => simd_and(k1, simd_le(a, b)),
4604            3 => i16x8::ZERO,
4605            4 => simd_and(k1, simd_ne(a, b)),
4606            5 => simd_and(k1, simd_ge(a, b)),
4607            6 => simd_and(k1, simd_gt(a, b)),
4608            _ => k1,
4609        };
4610        simd_bitmask(r)
4611    }
4612}
4613
4614/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4615///
4616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4617#[inline]
4618#[target_feature(enable = "avx512bw")]
4619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4620#[rustc_legacy_const_generics(2)]
4621#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4623pub const fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4624    unsafe {
4625        static_assert_uimm_bits!(IMM8, 3);
4626        let a = a.as_u8x64();
4627        let b = b.as_u8x64();
4628        let r = match IMM8 {
4629            0 => simd_eq(a, b),
4630            1 => simd_lt(a, b),
4631            2 => simd_le(a, b),
4632            3 => i8x64::ZERO,
4633            4 => simd_ne(a, b),
4634            5 => simd_ge(a, b),
4635            6 => simd_gt(a, b),
4636            _ => i8x64::splat(-1),
4637        };
4638        simd_bitmask(r)
4639    }
4640}
4641
4642/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4643///
4644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4645#[inline]
4646#[target_feature(enable = "avx512bw")]
4647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4648#[rustc_legacy_const_generics(3)]
4649#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4651pub const fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4652    k1: __mmask64,
4653    a: __m512i,
4654    b: __m512i,
4655) -> __mmask64 {
4656    unsafe {
4657        static_assert_uimm_bits!(IMM8, 3);
4658        let a = a.as_u8x64();
4659        let b = b.as_u8x64();
4660        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4661        let r = match IMM8 {
4662            0 => simd_and(k1, simd_eq(a, b)),
4663            1 => simd_and(k1, simd_lt(a, b)),
4664            2 => simd_and(k1, simd_le(a, b)),
4665            3 => i8x64::ZERO,
4666            4 => simd_and(k1, simd_ne(a, b)),
4667            5 => simd_and(k1, simd_ge(a, b)),
4668            6 => simd_and(k1, simd_gt(a, b)),
4669            _ => k1,
4670        };
4671        simd_bitmask(r)
4672    }
4673}
4674
4675/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4676///
4677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4678#[inline]
4679#[target_feature(enable = "avx512bw,avx512vl")]
4680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4681#[rustc_legacy_const_generics(2)]
4682#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4684pub const fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4685    unsafe {
4686        static_assert_uimm_bits!(IMM8, 3);
4687        let a = a.as_u8x32();
4688        let b = b.as_u8x32();
4689        let r = match IMM8 {
4690            0 => simd_eq(a, b),
4691            1 => simd_lt(a, b),
4692            2 => simd_le(a, b),
4693            3 => i8x32::ZERO,
4694            4 => simd_ne(a, b),
4695            5 => simd_ge(a, b),
4696            6 => simd_gt(a, b),
4697            _ => i8x32::splat(-1),
4698        };
4699        simd_bitmask(r)
4700    }
4701}
4702
4703/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4704///
4705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4706#[inline]
4707#[target_feature(enable = "avx512bw,avx512vl")]
4708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4709#[rustc_legacy_const_generics(3)]
4710#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4712pub const fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4713    k1: __mmask32,
4714    a: __m256i,
4715    b: __m256i,
4716) -> __mmask32 {
4717    unsafe {
4718        static_assert_uimm_bits!(IMM8, 3);
4719        let a = a.as_u8x32();
4720        let b = b.as_u8x32();
4721        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4722        let r = match IMM8 {
4723            0 => simd_and(k1, simd_eq(a, b)),
4724            1 => simd_and(k1, simd_lt(a, b)),
4725            2 => simd_and(k1, simd_le(a, b)),
4726            3 => i8x32::ZERO,
4727            4 => simd_and(k1, simd_ne(a, b)),
4728            5 => simd_and(k1, simd_ge(a, b)),
4729            6 => simd_and(k1, simd_gt(a, b)),
4730            _ => k1,
4731        };
4732        simd_bitmask(r)
4733    }
4734}
4735
4736/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4737///
4738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4739#[inline]
4740#[target_feature(enable = "avx512bw,avx512vl")]
4741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4742#[rustc_legacy_const_generics(2)]
4743#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4746    unsafe {
4747        static_assert_uimm_bits!(IMM8, 3);
4748        let a = a.as_u8x16();
4749        let b = b.as_u8x16();
4750        let r = match IMM8 {
4751            0 => simd_eq(a, b),
4752            1 => simd_lt(a, b),
4753            2 => simd_le(a, b),
4754            3 => i8x16::ZERO,
4755            4 => simd_ne(a, b),
4756            5 => simd_ge(a, b),
4757            6 => simd_gt(a, b),
4758            _ => i8x16::splat(-1),
4759        };
4760        simd_bitmask(r)
4761    }
4762}
4763
4764/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4765///
4766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4767#[inline]
4768#[target_feature(enable = "avx512bw,avx512vl")]
4769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4770#[rustc_legacy_const_generics(3)]
4771#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4773pub const fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
4774    k1: __mmask16,
4775    a: __m128i,
4776    b: __m128i,
4777) -> __mmask16 {
4778    unsafe {
4779        static_assert_uimm_bits!(IMM8, 3);
4780        let a = a.as_u8x16();
4781        let b = b.as_u8x16();
4782        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4783        let r = match IMM8 {
4784            0 => simd_and(k1, simd_eq(a, b)),
4785            1 => simd_and(k1, simd_lt(a, b)),
4786            2 => simd_and(k1, simd_le(a, b)),
4787            3 => i8x16::ZERO,
4788            4 => simd_and(k1, simd_ne(a, b)),
4789            5 => simd_and(k1, simd_ge(a, b)),
4790            6 => simd_and(k1, simd_gt(a, b)),
4791            _ => k1,
4792        };
4793        simd_bitmask(r)
4794    }
4795}
4796
4797/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4800#[inline]
4801#[target_feature(enable = "avx512bw")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[rustc_legacy_const_generics(2)]
4804#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4806pub const fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4807    unsafe {
4808        static_assert_uimm_bits!(IMM8, 3);
4809        let a = a.as_i16x32();
4810        let b = b.as_i16x32();
4811        let r = match IMM8 {
4812            0 => simd_eq(a, b),
4813            1 => simd_lt(a, b),
4814            2 => simd_le(a, b),
4815            3 => i16x32::ZERO,
4816            4 => simd_ne(a, b),
4817            5 => simd_ge(a, b),
4818            6 => simd_gt(a, b),
4819            _ => i16x32::splat(-1),
4820        };
4821        simd_bitmask(r)
4822    }
4823}
4824
4825/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4826///
4827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4828#[inline]
4829#[target_feature(enable = "avx512bw")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_legacy_const_generics(3)]
4832#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4834pub const fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4835    k1: __mmask32,
4836    a: __m512i,
4837    b: __m512i,
4838) -> __mmask32 {
4839    unsafe {
4840        static_assert_uimm_bits!(IMM8, 3);
4841        let a = a.as_i16x32();
4842        let b = b.as_i16x32();
4843        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4844        let r = match IMM8 {
4845            0 => simd_and(k1, simd_eq(a, b)),
4846            1 => simd_and(k1, simd_lt(a, b)),
4847            2 => simd_and(k1, simd_le(a, b)),
4848            3 => i16x32::ZERO,
4849            4 => simd_and(k1, simd_ne(a, b)),
4850            5 => simd_and(k1, simd_ge(a, b)),
4851            6 => simd_and(k1, simd_gt(a, b)),
4852            _ => k1,
4853        };
4854        simd_bitmask(r)
4855    }
4856}
4857
4858/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4859///
4860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4861#[inline]
4862#[target_feature(enable = "avx512bw,avx512vl")]
4863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4864#[rustc_legacy_const_generics(2)]
4865#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4867pub const fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4868    unsafe {
4869        static_assert_uimm_bits!(IMM8, 3);
4870        let a = a.as_i16x16();
4871        let b = b.as_i16x16();
4872        let r = match IMM8 {
4873            0 => simd_eq(a, b),
4874            1 => simd_lt(a, b),
4875            2 => simd_le(a, b),
4876            3 => i16x16::ZERO,
4877            4 => simd_ne(a, b),
4878            5 => simd_ge(a, b),
4879            6 => simd_gt(a, b),
4880            _ => i16x16::splat(-1),
4881        };
4882        simd_bitmask(r)
4883    }
4884}
4885
4886/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892#[rustc_legacy_const_generics(3)]
4893#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4895pub const fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4896    k1: __mmask16,
4897    a: __m256i,
4898    b: __m256i,
4899) -> __mmask16 {
4900    unsafe {
4901        static_assert_uimm_bits!(IMM8, 3);
4902        let a = a.as_i16x16();
4903        let b = b.as_i16x16();
4904        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4905        let r = match IMM8 {
4906            0 => simd_and(k1, simd_eq(a, b)),
4907            1 => simd_and(k1, simd_lt(a, b)),
4908            2 => simd_and(k1, simd_le(a, b)),
4909            3 => i16x16::ZERO,
4910            4 => simd_and(k1, simd_ne(a, b)),
4911            5 => simd_and(k1, simd_ge(a, b)),
4912            6 => simd_and(k1, simd_gt(a, b)),
4913            _ => k1,
4914        };
4915        simd_bitmask(r)
4916    }
4917}
4918
4919/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4920///
4921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4922#[inline]
4923#[target_feature(enable = "avx512bw,avx512vl")]
4924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4925#[rustc_legacy_const_generics(2)]
4926#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4929    unsafe {
4930        static_assert_uimm_bits!(IMM8, 3);
4931        let a = a.as_i16x8();
4932        let b = b.as_i16x8();
4933        let r = match IMM8 {
4934            0 => simd_eq(a, b),
4935            1 => simd_lt(a, b),
4936            2 => simd_le(a, b),
4937            3 => i16x8::ZERO,
4938            4 => simd_ne(a, b),
4939            5 => simd_ge(a, b),
4940            6 => simd_gt(a, b),
4941            _ => i16x8::splat(-1),
4942        };
4943        simd_bitmask(r)
4944    }
4945}
4946
4947/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4948///
4949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4950#[inline]
4951#[target_feature(enable = "avx512bw,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_legacy_const_generics(3)]
4954#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4956pub const fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
4957    k1: __mmask8,
4958    a: __m128i,
4959    b: __m128i,
4960) -> __mmask8 {
4961    unsafe {
4962        static_assert_uimm_bits!(IMM8, 3);
4963        let a = a.as_i16x8();
4964        let b = b.as_i16x8();
4965        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4966        let r = match IMM8 {
4967            0 => simd_and(k1, simd_eq(a, b)),
4968            1 => simd_and(k1, simd_lt(a, b)),
4969            2 => simd_and(k1, simd_le(a, b)),
4970            3 => i16x8::ZERO,
4971            4 => simd_and(k1, simd_ne(a, b)),
4972            5 => simd_and(k1, simd_ge(a, b)),
4973            6 => simd_and(k1, simd_gt(a, b)),
4974            _ => k1,
4975        };
4976        simd_bitmask(r)
4977    }
4978}
4979
4980/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4981///
4982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4983#[inline]
4984#[target_feature(enable = "avx512bw")]
4985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4986#[rustc_legacy_const_generics(2)]
4987#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4989pub const fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4990    unsafe {
4991        static_assert_uimm_bits!(IMM8, 3);
4992        let a = a.as_i8x64();
4993        let b = b.as_i8x64();
4994        let r = match IMM8 {
4995            0 => simd_eq(a, b),
4996            1 => simd_lt(a, b),
4997            2 => simd_le(a, b),
4998            3 => i8x64::ZERO,
4999            4 => simd_ne(a, b),
5000            5 => simd_ge(a, b),
5001            6 => simd_gt(a, b),
5002            _ => i8x64::splat(-1),
5003        };
5004        simd_bitmask(r)
5005    }
5006}
5007
5008/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5009///
5010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
5011#[inline]
5012#[target_feature(enable = "avx512bw")]
5013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5014#[rustc_legacy_const_generics(3)]
5015#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5017pub const fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
5018    k1: __mmask64,
5019    a: __m512i,
5020    b: __m512i,
5021) -> __mmask64 {
5022    unsafe {
5023        static_assert_uimm_bits!(IMM8, 3);
5024        let a = a.as_i8x64();
5025        let b = b.as_i8x64();
5026        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
5027        let r = match IMM8 {
5028            0 => simd_and(k1, simd_eq(a, b)),
5029            1 => simd_and(k1, simd_lt(a, b)),
5030            2 => simd_and(k1, simd_le(a, b)),
5031            3 => i8x64::ZERO,
5032            4 => simd_and(k1, simd_ne(a, b)),
5033            5 => simd_and(k1, simd_ge(a, b)),
5034            6 => simd_and(k1, simd_gt(a, b)),
5035            _ => k1,
5036        };
5037        simd_bitmask(r)
5038    }
5039}
5040
5041/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5042///
5043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
5044#[inline]
5045#[target_feature(enable = "avx512bw,avx512vl")]
5046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5047#[rustc_legacy_const_generics(2)]
5048#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5050pub const fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
5051    unsafe {
5052        static_assert_uimm_bits!(IMM8, 3);
5053        let a = a.as_i8x32();
5054        let b = b.as_i8x32();
5055        let r = match IMM8 {
5056            0 => simd_eq(a, b),
5057            1 => simd_lt(a, b),
5058            2 => simd_le(a, b),
5059            3 => i8x32::ZERO,
5060            4 => simd_ne(a, b),
5061            5 => simd_ge(a, b),
5062            6 => simd_gt(a, b),
5063            _ => i8x32::splat(-1),
5064        };
5065        simd_bitmask(r)
5066    }
5067}
5068
5069/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5070///
5071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
5072#[inline]
5073#[target_feature(enable = "avx512bw,avx512vl")]
5074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5075#[rustc_legacy_const_generics(3)]
5076#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5078pub const fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
5079    k1: __mmask32,
5080    a: __m256i,
5081    b: __m256i,
5082) -> __mmask32 {
5083    unsafe {
5084        static_assert_uimm_bits!(IMM8, 3);
5085        let a = a.as_i8x32();
5086        let b = b.as_i8x32();
5087        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
5088        let r = match IMM8 {
5089            0 => simd_and(k1, simd_eq(a, b)),
5090            1 => simd_and(k1, simd_lt(a, b)),
5091            2 => simd_and(k1, simd_le(a, b)),
5092            3 => i8x32::ZERO,
5093            4 => simd_and(k1, simd_ne(a, b)),
5094            5 => simd_and(k1, simd_ge(a, b)),
5095            6 => simd_and(k1, simd_gt(a, b)),
5096            _ => k1,
5097        };
5098        simd_bitmask(r)
5099    }
5100}
5101
5102/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5103///
5104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
5105#[inline]
5106#[target_feature(enable = "avx512bw,avx512vl")]
5107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5108#[rustc_legacy_const_generics(2)]
5109#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5111pub const fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
5112    unsafe {
5113        static_assert_uimm_bits!(IMM8, 3);
5114        let a = a.as_i8x16();
5115        let b = b.as_i8x16();
5116        let r = match IMM8 {
5117            0 => simd_eq(a, b),
5118            1 => simd_lt(a, b),
5119            2 => simd_le(a, b),
5120            3 => i8x16::ZERO,
5121            4 => simd_ne(a, b),
5122            5 => simd_ge(a, b),
5123            6 => simd_gt(a, b),
5124            _ => i8x16::splat(-1),
5125        };
5126        simd_bitmask(r)
5127    }
5128}
5129
5130/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[rustc_legacy_const_generics(3)]
5137#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5139pub const fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
5140    k1: __mmask16,
5141    a: __m128i,
5142    b: __m128i,
5143) -> __mmask16 {
5144    unsafe {
5145        static_assert_uimm_bits!(IMM8, 3);
5146        let a = a.as_i8x16();
5147        let b = b.as_i8x16();
5148        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
5149        let r = match IMM8 {
5150            0 => simd_and(k1, simd_eq(a, b)),
5151            1 => simd_and(k1, simd_lt(a, b)),
5152            2 => simd_and(k1, simd_le(a, b)),
5153            3 => i8x16::ZERO,
5154            4 => simd_and(k1, simd_ne(a, b)),
5155            5 => simd_and(k1, simd_ge(a, b)),
5156            6 => simd_and(k1, simd_gt(a, b)),
5157            _ => k1,
5158        };
5159        simd_bitmask(r)
5160    }
5161}
5162
5163/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
5166#[inline]
5167#[target_feature(enable = "avx512bw,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5170pub const fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
5171    unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
5172}
5173
5174/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
5177#[inline]
5178#[target_feature(enable = "avx512bw,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5181pub const fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
5182    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
5183}
5184
5185/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
5188#[inline]
5189#[target_feature(enable = "avx512bw,avx512vl")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5192pub const fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
5193    unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
5194}
5195
5196/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
5199#[inline]
5200#[target_feature(enable = "avx512bw,avx512vl")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5203pub const fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
5204    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
5205}
5206
5207/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
5210#[inline]
5211#[target_feature(enable = "avx512bw,avx512vl")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5214pub const fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
5215    unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
5216}
5217
5218/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
5221#[inline]
5222#[target_feature(enable = "avx512bw,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5225pub const fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
5226    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
5227}
5228
5229/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
5232#[inline]
5233#[target_feature(enable = "avx512bw,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5236pub const fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
5237    unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
5238}
5239
5240/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5247pub const fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
5248    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
5249}
5250
5251/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
5254#[inline]
5255#[target_feature(enable = "avx512bw,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5258pub const fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
5259    unsafe { simd_reduce_and(a.as_i16x16()) }
5260}
5261
5262/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
5265#[inline]
5266#[target_feature(enable = "avx512bw,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5269pub const fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
5270    unsafe {
5271        simd_reduce_and(simd_select_bitmask(
5272            k,
5273            a.as_i16x16(),
5274            _mm256_set1_epi64x(-1).as_i16x16(),
5275        ))
5276    }
5277}
5278
5279/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
5282#[inline]
5283#[target_feature(enable = "avx512bw,avx512vl")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5286pub const fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
5287    unsafe { simd_reduce_and(a.as_i16x8()) }
5288}
5289
5290/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5297pub const fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
5298    unsafe {
5299        simd_reduce_and(simd_select_bitmask(
5300            k,
5301            a.as_i16x8(),
5302            _mm_set1_epi64x(-1).as_i16x8(),
5303        ))
5304    }
5305}
5306
5307/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5308///
5309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
5310#[inline]
5311#[target_feature(enable = "avx512bw,avx512vl")]
5312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5314pub const fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
5315    unsafe { simd_reduce_and(a.as_i8x32()) }
5316}
5317
5318/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5319///
5320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
5321#[inline]
5322#[target_feature(enable = "avx512bw,avx512vl")]
5323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5324#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5325pub const fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
5326    unsafe {
5327        simd_reduce_and(simd_select_bitmask(
5328            k,
5329            a.as_i8x32(),
5330            _mm256_set1_epi64x(-1).as_i8x32(),
5331        ))
5332    }
5333}
5334
5335/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5336///
5337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
5338#[inline]
5339#[target_feature(enable = "avx512bw,avx512vl")]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5342pub const fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
5343    unsafe { simd_reduce_and(a.as_i8x16()) }
5344}
5345
5346/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5347///
5348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
5349#[inline]
5350#[target_feature(enable = "avx512bw,avx512vl")]
5351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5353pub const fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
5354    unsafe {
5355        simd_reduce_and(simd_select_bitmask(
5356            k,
5357            a.as_i8x16(),
5358            _mm_set1_epi64x(-1).as_i8x16(),
5359        ))
5360    }
5361}
5362
5363/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5364///
5365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
5366#[inline]
5367#[target_feature(enable = "avx512bw,avx512vl")]
5368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5370pub const fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
5371    unsafe { simd_reduce_max(a.as_i16x16()) }
5372}
5373
5374/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5375///
5376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5377#[inline]
5378#[target_feature(enable = "avx512bw,avx512vl")]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5381pub const fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5382    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5383}
5384
5385/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5386///
5387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5388#[inline]
5389#[target_feature(enable = "avx512bw,avx512vl")]
5390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5392pub const fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5393    unsafe { simd_reduce_max(a.as_i16x8()) }
5394}
5395
5396/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5397///
5398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5399#[inline]
5400#[target_feature(enable = "avx512bw,avx512vl")]
5401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5403pub const fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5404    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5405}
5406
5407/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5408///
5409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5410#[inline]
5411#[target_feature(enable = "avx512bw,avx512vl")]
5412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5414pub const fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5415    unsafe { simd_reduce_max(a.as_i8x32()) }
5416}
5417
5418/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5419///
5420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5421#[inline]
5422#[target_feature(enable = "avx512bw,avx512vl")]
5423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5424#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5425pub const fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5426    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5427}
5428
5429/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5430///
5431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5432#[inline]
5433#[target_feature(enable = "avx512bw,avx512vl")]
5434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5436pub const fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5437    unsafe { simd_reduce_max(a.as_i8x16()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5447pub const fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5448    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5449}
5450
5451/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5452///
5453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5454#[inline]
5455#[target_feature(enable = "avx512bw,avx512vl")]
5456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5457#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5458pub const fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5459    unsafe { simd_reduce_max(a.as_u16x16()) }
5460}
5461
5462/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5463///
5464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5465#[inline]
5466#[target_feature(enable = "avx512bw,avx512vl")]
5467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5469pub const fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5470    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5471}
5472
5473/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5474///
5475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5476#[inline]
5477#[target_feature(enable = "avx512bw,avx512vl")]
5478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5480pub const fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5481    unsafe { simd_reduce_max(a.as_u16x8()) }
5482}
5483
5484/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5485///
5486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5487#[inline]
5488#[target_feature(enable = "avx512bw,avx512vl")]
5489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5491pub const fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5492    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5493}
5494
5495/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5496///
5497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5498#[inline]
5499#[target_feature(enable = "avx512bw,avx512vl")]
5500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5502pub const fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5503    unsafe { simd_reduce_max(a.as_u8x32()) }
5504}
5505
5506/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5507///
5508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5509#[inline]
5510#[target_feature(enable = "avx512bw,avx512vl")]
5511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5513pub const fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5514    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5515}
5516
5517/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5518///
5519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5520#[inline]
5521#[target_feature(enable = "avx512bw,avx512vl")]
5522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5524pub const fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5525    unsafe { simd_reduce_max(a.as_u8x16()) }
5526}
5527
5528/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5529///
5530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5531#[inline]
5532#[target_feature(enable = "avx512bw,avx512vl")]
5533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5535pub const fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5536    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5537}
5538
5539/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5540///
5541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5542#[inline]
5543#[target_feature(enable = "avx512bw,avx512vl")]
5544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5546pub const fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5547    unsafe { simd_reduce_min(a.as_i16x16()) }
5548}
5549
5550/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5551///
5552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5553#[inline]
5554#[target_feature(enable = "avx512bw,avx512vl")]
5555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5557pub const fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5558    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5559}
5560
5561/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5562///
5563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5564#[inline]
5565#[target_feature(enable = "avx512bw,avx512vl")]
5566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5568pub const fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5569    unsafe { simd_reduce_min(a.as_i16x8()) }
5570}
5571
5572/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5573///
5574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5575#[inline]
5576#[target_feature(enable = "avx512bw,avx512vl")]
5577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5579pub const fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5580    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5581}
5582
5583/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5586#[inline]
5587#[target_feature(enable = "avx512bw,avx512vl")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5590pub const fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5591    unsafe { simd_reduce_min(a.as_i8x32()) }
5592}
5593
5594/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5595///
5596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5597#[inline]
5598#[target_feature(enable = "avx512bw,avx512vl")]
5599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5601pub const fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5602    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5603}
5604
5605/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5606///
5607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5608#[inline]
5609#[target_feature(enable = "avx512bw,avx512vl")]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5612pub const fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5613    unsafe { simd_reduce_min(a.as_i8x16()) }
5614}
5615
5616/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5617///
5618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5619#[inline]
5620#[target_feature(enable = "avx512bw,avx512vl")]
5621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5623pub const fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5624    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5625}
5626
5627/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5628///
5629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5630#[inline]
5631#[target_feature(enable = "avx512bw,avx512vl")]
5632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5634pub const fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5635    unsafe { simd_reduce_min(a.as_u16x16()) }
5636}
5637
5638/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5639///
5640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5641#[inline]
5642#[target_feature(enable = "avx512bw,avx512vl")]
5643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5645pub const fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5646    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5647}
5648
5649/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5650///
5651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5652#[inline]
5653#[target_feature(enable = "avx512bw,avx512vl")]
5654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5656pub const fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5657    unsafe { simd_reduce_min(a.as_u16x8()) }
5658}
5659
5660/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5661///
5662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5663#[inline]
5664#[target_feature(enable = "avx512bw,avx512vl")]
5665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5667pub const fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5668    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5669}
5670
5671/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5672///
5673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5674#[inline]
5675#[target_feature(enable = "avx512bw,avx512vl")]
5676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5678pub const fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5679    unsafe { simd_reduce_min(a.as_u8x32()) }
5680}
5681
5682/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5683///
5684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5685#[inline]
5686#[target_feature(enable = "avx512bw,avx512vl")]
5687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5689pub const fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5690    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5691}
5692
5693/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5694///
5695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5696#[inline]
5697#[target_feature(enable = "avx512bw,avx512vl")]
5698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5700pub const fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5701    unsafe { simd_reduce_min(a.as_u8x16()) }
5702}
5703
5704/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5705///
5706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5707#[inline]
5708#[target_feature(enable = "avx512bw,avx512vl")]
5709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5711pub const fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5712    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5713}
5714
5715/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5716///
5717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5718#[inline]
5719#[target_feature(enable = "avx512bw,avx512vl")]
5720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5722pub const fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5723    unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
5724}
5725
5726/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5727///
5728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5729#[inline]
5730#[target_feature(enable = "avx512bw,avx512vl")]
5731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5733pub const fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5734    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
5735}
5736
5737/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5738///
5739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5740#[inline]
5741#[target_feature(enable = "avx512bw,avx512vl")]
5742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5744pub const fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5745    unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
5746}
5747
5748/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5749///
5750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5751#[inline]
5752#[target_feature(enable = "avx512bw,avx512vl")]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5755pub const fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5756    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
5757}
5758
5759/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5760///
5761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5762#[inline]
5763#[target_feature(enable = "avx512bw,avx512vl")]
5764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5766pub const fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5767    unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
5768}
5769
5770/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5771///
5772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5773#[inline]
5774#[target_feature(enable = "avx512bw,avx512vl")]
5775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5777pub const fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5778    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
5779}
5780
5781/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5782///
5783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5784#[inline]
5785#[target_feature(enable = "avx512bw,avx512vl")]
5786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5788pub const fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5789    unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
5790}
5791
5792/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5793///
5794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5795#[inline]
5796#[target_feature(enable = "avx512bw,avx512vl")]
5797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5799pub const fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5800    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
5801}
5802
5803/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5804///
5805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5806#[inline]
5807#[target_feature(enable = "avx512bw,avx512vl")]
5808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5810pub const fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5811    unsafe { simd_reduce_or(a.as_i16x16()) }
5812}
5813
5814/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5815///
5816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5817#[inline]
5818#[target_feature(enable = "avx512bw,avx512vl")]
5819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5821pub const fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5822    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5823}
5824
5825/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5826///
5827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5828#[inline]
5829#[target_feature(enable = "avx512bw,avx512vl")]
5830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5832pub const fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5833    unsafe { simd_reduce_or(a.as_i16x8()) }
5834}
5835
5836/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5837///
5838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5839#[inline]
5840#[target_feature(enable = "avx512bw,avx512vl")]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5843pub const fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5844    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5845}
5846
5847/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5848///
5849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5850#[inline]
5851#[target_feature(enable = "avx512bw,avx512vl")]
5852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5854pub const fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5855    unsafe { simd_reduce_or(a.as_i8x32()) }
5856}
5857
5858/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5859///
5860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5861#[inline]
5862#[target_feature(enable = "avx512bw,avx512vl")]
5863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5865pub const fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5866    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5867}
5868
5869/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5870///
5871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5872#[inline]
5873#[target_feature(enable = "avx512bw,avx512vl")]
5874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5875#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5876pub const fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5877    unsafe { simd_reduce_or(a.as_i8x16()) }
5878}
5879
5880/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5881///
5882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5883#[inline]
5884#[target_feature(enable = "avx512bw,avx512vl")]
5885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5887pub const fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5888    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5889}
5890
5891/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5892///
5893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5894#[inline]
5895#[target_feature(enable = "avx512bw")]
5896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5897#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5899pub const unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5900    ptr::read_unaligned(mem_addr as *const __m512i)
5901}
5902
5903/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5904///
5905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5906#[inline]
5907#[target_feature(enable = "avx512bw,avx512vl")]
5908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5909#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5911pub const unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5912    ptr::read_unaligned(mem_addr as *const __m256i)
5913}
5914
5915/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5916///
5917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5918#[inline]
5919#[target_feature(enable = "avx512bw,avx512vl")]
5920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5921#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5923pub const unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5924    ptr::read_unaligned(mem_addr as *const __m128i)
5925}
5926
5927/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5928///
5929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5930#[inline]
5931#[target_feature(enable = "avx512bw")]
5932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5933#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5935pub const unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5936    ptr::read_unaligned(mem_addr as *const __m512i)
5937}
5938
5939/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5940///
5941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5942#[inline]
5943#[target_feature(enable = "avx512bw,avx512vl")]
5944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5945#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5947pub const unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5948    ptr::read_unaligned(mem_addr as *const __m256i)
5949}
5950
5951/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5952///
5953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5954#[inline]
5955#[target_feature(enable = "avx512bw,avx512vl")]
5956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5957#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5959pub const unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5960    ptr::read_unaligned(mem_addr as *const __m128i)
5961}
5962
5963/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5964///
5965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5966#[inline]
5967#[target_feature(enable = "avx512bw")]
5968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5969#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5971pub const unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5972    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5973}
5974
5975/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5976///
5977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5978#[inline]
5979#[target_feature(enable = "avx512bw,avx512vl")]
5980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5981#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5983pub const unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5984    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5985}
5986
5987/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5988///
5989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5990#[inline]
5991#[target_feature(enable = "avx512bw,avx512vl")]
5992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5993#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5995pub const unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5996    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5997}
5998
5999/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6000///
6001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
6002#[inline]
6003#[target_feature(enable = "avx512bw")]
6004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6005#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6007pub const unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
6008    ptr::write_unaligned(mem_addr as *mut __m512i, a);
6009}
6010
6011/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6012///
6013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
6014#[inline]
6015#[target_feature(enable = "avx512bw,avx512vl")]
6016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6017#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6019pub const unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
6020    ptr::write_unaligned(mem_addr as *mut __m256i, a);
6021}
6022
6023/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6024///
6025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
6026#[inline]
6027#[target_feature(enable = "avx512bw,avx512vl")]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6031pub const unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
6032    ptr::write_unaligned(mem_addr as *mut __m128i, a);
6033}
6034
6035/// Load packed 16-bit integers from memory into dst using writemask k
6036/// (elements are copied from src when the corresponding mask bit is not set).
6037/// mem_addr does not need to be aligned on any particular boundary.
6038///
6039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
6040#[inline]
6041#[target_feature(enable = "avx512bw")]
6042#[cfg_attr(test, assert_instr(vmovdqu16))]
6043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6045pub const unsafe fn _mm512_mask_loadu_epi16(
6046    src: __m512i,
6047    k: __mmask32,
6048    mem_addr: *const i16,
6049) -> __m512i {
6050    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
6051    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
6052}
6053
6054/// Load packed 16-bit integers from memory into dst using zeromask k
6055/// (elements are zeroed out when the corresponding mask bit is not set).
6056/// mem_addr does not need to be aligned on any particular boundary.
6057///
6058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
6059#[inline]
6060#[target_feature(enable = "avx512bw")]
6061#[cfg_attr(test, assert_instr(vmovdqu16))]
6062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6064pub const unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
6065    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
6066}
6067
6068/// Load packed 8-bit integers from memory into dst using writemask k
6069/// (elements are copied from src when the corresponding mask bit is not set).
6070/// mem_addr does not need to be aligned on any particular boundary.
6071///
6072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
6073#[inline]
6074#[target_feature(enable = "avx512bw")]
6075#[cfg_attr(test, assert_instr(vmovdqu8))]
6076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6078pub const unsafe fn _mm512_mask_loadu_epi8(
6079    src: __m512i,
6080    k: __mmask64,
6081    mem_addr: *const i8,
6082) -> __m512i {
6083    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
6084    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
6085}
6086
6087/// Load packed 8-bit integers from memory into dst using zeromask k
6088/// (elements are zeroed out when the corresponding mask bit is not set).
6089/// mem_addr does not need to be aligned on any particular boundary.
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
6092#[inline]
6093#[target_feature(enable = "avx512bw")]
6094#[cfg_attr(test, assert_instr(vmovdqu8))]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6097pub const unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
6098    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
6099}
6100
6101/// Load packed 16-bit integers from memory into dst using writemask k
6102/// (elements are copied from src when the corresponding mask bit is not set).
6103/// mem_addr does not need to be aligned on any particular boundary.
6104///
6105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
6106#[inline]
6107#[target_feature(enable = "avx512bw,avx512vl")]
6108#[cfg_attr(test, assert_instr(vmovdqu16))]
6109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6111pub const unsafe fn _mm256_mask_loadu_epi16(
6112    src: __m256i,
6113    k: __mmask16,
6114    mem_addr: *const i16,
6115) -> __m256i {
6116    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
6117    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
6118}
6119
6120/// Load packed 16-bit integers from memory into dst using zeromask k
6121/// (elements are zeroed out when the corresponding mask bit is not set).
6122/// mem_addr does not need to be aligned on any particular boundary.
6123///
6124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
6125#[inline]
6126#[target_feature(enable = "avx512bw,avx512vl")]
6127#[cfg_attr(test, assert_instr(vmovdqu16))]
6128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6130pub const unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
6131    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
6132}
6133
6134/// Load packed 8-bit integers from memory into dst using writemask k
6135/// (elements are copied from src when the corresponding mask bit is not set).
6136/// mem_addr does not need to be aligned on any particular boundary.
6137///
6138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
6139#[inline]
6140#[target_feature(enable = "avx512bw,avx512vl")]
6141#[cfg_attr(test, assert_instr(vmovdqu8))]
6142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6144pub const unsafe fn _mm256_mask_loadu_epi8(
6145    src: __m256i,
6146    k: __mmask32,
6147    mem_addr: *const i8,
6148) -> __m256i {
6149    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
6150    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
6151}
6152
6153/// Load packed 8-bit integers from memory into dst using zeromask k
6154/// (elements are zeroed out when the corresponding mask bit is not set).
6155/// mem_addr does not need to be aligned on any particular boundary.
6156///
6157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
6158#[inline]
6159#[target_feature(enable = "avx512bw,avx512vl")]
6160#[cfg_attr(test, assert_instr(vmovdqu8))]
6161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6163pub const unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
6164    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
6165}
6166
6167/// Load packed 16-bit integers from memory into dst using writemask k
6168/// (elements are copied from src when the corresponding mask bit is not set).
6169/// mem_addr does not need to be aligned on any particular boundary.
6170///
6171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
6172#[inline]
6173#[target_feature(enable = "avx512bw,avx512vl")]
6174#[cfg_attr(test, assert_instr(vmovdqu16))]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6177pub const unsafe fn _mm_mask_loadu_epi16(
6178    src: __m128i,
6179    k: __mmask8,
6180    mem_addr: *const i16,
6181) -> __m128i {
6182    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
6183    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
6184}
6185
6186/// Load packed 16-bit integers from memory into dst using zeromask k
6187/// (elements are zeroed out when the corresponding mask bit is not set).
6188/// mem_addr does not need to be aligned on any particular boundary.
6189///
6190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
6191#[inline]
6192#[target_feature(enable = "avx512bw,avx512vl")]
6193#[cfg_attr(test, assert_instr(vmovdqu16))]
6194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6196pub const unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
6197    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
6198}
6199
6200/// Load packed 8-bit integers from memory into dst using writemask k
6201/// (elements are copied from src when the corresponding mask bit is not set).
6202/// mem_addr does not need to be aligned on any particular boundary.
6203///
6204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
6205#[inline]
6206#[target_feature(enable = "avx512bw,avx512vl")]
6207#[cfg_attr(test, assert_instr(vmovdqu8))]
6208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6210pub const unsafe fn _mm_mask_loadu_epi8(
6211    src: __m128i,
6212    k: __mmask16,
6213    mem_addr: *const i8,
6214) -> __m128i {
6215    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
6216    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
6217}
6218
6219/// Load packed 8-bit integers from memory into dst using zeromask k
6220/// (elements are zeroed out when the corresponding mask bit is not set).
6221/// mem_addr does not need to be aligned on any particular boundary.
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
6224#[inline]
6225#[target_feature(enable = "avx512bw,avx512vl")]
6226#[cfg_attr(test, assert_instr(vmovdqu8))]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6229pub const unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
6230    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
6231}
6232
6233/// Store packed 16-bit integers from a into memory using writemask k.
6234/// mem_addr does not need to be aligned on any particular boundary.
6235///
6236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
6237#[inline]
6238#[target_feature(enable = "avx512bw")]
6239#[cfg_attr(test, assert_instr(vmovdqu16))]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6242pub const unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
6243    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
6244    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
6245}
6246
6247/// Store packed 8-bit integers from a into memory using writemask k.
6248/// mem_addr does not need to be aligned on any particular boundary.
6249///
6250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
6251#[inline]
6252#[target_feature(enable = "avx512bw")]
6253#[cfg_attr(test, assert_instr(vmovdqu8))]
6254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6256pub const unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
6257    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
6258    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
6259}
6260
6261/// Store packed 16-bit integers from a into memory using writemask k.
6262/// mem_addr does not need to be aligned on any particular boundary.
6263///
6264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
6265#[inline]
6266#[target_feature(enable = "avx512bw,avx512vl")]
6267#[cfg_attr(test, assert_instr(vmovdqu16))]
6268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6270pub const unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
6271    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
6272    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
6273}
6274
6275/// Store packed 8-bit integers from a into memory using writemask k.
6276/// mem_addr does not need to be aligned on any particular boundary.
6277///
6278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
6279#[inline]
6280#[target_feature(enable = "avx512bw,avx512vl")]
6281#[cfg_attr(test, assert_instr(vmovdqu8))]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6284pub const unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
6285    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
6286    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
6287}
6288
6289/// Store packed 16-bit integers from a into memory using writemask k.
6290/// mem_addr does not need to be aligned on any particular boundary.
6291///
6292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
6293#[inline]
6294#[target_feature(enable = "avx512bw,avx512vl")]
6295#[cfg_attr(test, assert_instr(vmovdqu16))]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6298pub const unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
6299    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
6300    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
6301}
6302
6303/// Store packed 8-bit integers from a into memory using writemask k.
6304/// mem_addr does not need to be aligned on any particular boundary.
6305///
6306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
6307#[inline]
6308#[target_feature(enable = "avx512bw,avx512vl")]
6309#[cfg_attr(test, assert_instr(vmovdqu8))]
6310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6312pub const unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
6313    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
6314    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
6315}
6316
6317/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
6318///
6319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
6320#[inline]
6321#[target_feature(enable = "avx512bw")]
6322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6323#[cfg_attr(test, assert_instr(vpmaddwd))]
6324pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
6325    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
6326    //
6327    // ```rust
6328    // #[target_feature(enable = "avx512bw")]
6329    // unsafe fn widening_add(mad: __m512i) -> __m512i {
6330    //     _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
6331    // }
6332    // ```
6333    //
6334    // If we implement this using generic vector intrinsics, the optimizer
6335    // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
6336    // For this reason, we use x86 intrinsics.
6337    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
6338}
6339
6340/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6341///
6342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
6343#[inline]
6344#[target_feature(enable = "avx512bw")]
6345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6346#[cfg_attr(test, assert_instr(vpmaddwd))]
6347pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6348    unsafe {
6349        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6350        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
6351    }
6352}
6353
6354/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
6357#[inline]
6358#[target_feature(enable = "avx512bw")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vpmaddwd))]
6361pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6362    unsafe {
6363        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6364        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
6365    }
6366}
6367
6368/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6369///
6370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
6371#[inline]
6372#[target_feature(enable = "avx512bw,avx512vl")]
6373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6374#[cfg_attr(test, assert_instr(vpmaddwd))]
6375pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6376    unsafe {
6377        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6378        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
6379    }
6380}
6381
6382/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6383///
6384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
6385#[inline]
6386#[target_feature(enable = "avx512bw,avx512vl")]
6387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6388#[cfg_attr(test, assert_instr(vpmaddwd))]
6389pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6390    unsafe {
6391        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6392        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
6393    }
6394}
6395
6396/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6397///
6398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
6399#[inline]
6400#[target_feature(enable = "avx512bw,avx512vl")]
6401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6402#[cfg_attr(test, assert_instr(vpmaddwd))]
6403pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6404    unsafe {
6405        let madd = _mm_madd_epi16(a, b).as_i32x4();
6406        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
6407    }
6408}
6409
6410/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6411///
6412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
6413#[inline]
6414#[target_feature(enable = "avx512bw,avx512vl")]
6415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6416#[cfg_attr(test, assert_instr(vpmaddwd))]
6417pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6418    unsafe {
6419        let madd = _mm_madd_epi16(a, b).as_i32x4();
6420        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
6421    }
6422}
6423
6424/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
6425///
6426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
6427#[inline]
6428#[target_feature(enable = "avx512bw")]
6429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6430#[cfg_attr(test, assert_instr(vpmaddubsw))]
6431pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
6432    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
6433}
6434
6435/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6436///
6437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
6438#[inline]
6439#[target_feature(enable = "avx512bw")]
6440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6441#[cfg_attr(test, assert_instr(vpmaddubsw))]
6442pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6443    unsafe {
6444        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6445        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
6446    }
6447}
6448
6449/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6450///
6451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
6452#[inline]
6453#[target_feature(enable = "avx512bw")]
6454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6455#[cfg_attr(test, assert_instr(vpmaddubsw))]
6456pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6457    unsafe {
6458        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6459        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
6460    }
6461}
6462
6463/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6464///
6465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
6466#[inline]
6467#[target_feature(enable = "avx512bw,avx512vl")]
6468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6469#[cfg_attr(test, assert_instr(vpmaddubsw))]
6470pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6471    unsafe {
6472        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6473        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
6474    }
6475}
6476
6477/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6478///
6479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
6480#[inline]
6481#[target_feature(enable = "avx512bw,avx512vl")]
6482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6483#[cfg_attr(test, assert_instr(vpmaddubsw))]
6484pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6485    unsafe {
6486        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6487        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6488    }
6489}
6490
6491/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6494#[inline]
6495#[target_feature(enable = "avx512bw,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vpmaddubsw))]
6498pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6499    unsafe {
6500        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6501        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6502    }
6503}
6504
6505/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6506///
6507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6508#[inline]
6509#[target_feature(enable = "avx512bw,avx512vl")]
6510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6511#[cfg_attr(test, assert_instr(vpmaddubsw))]
6512pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6513    unsafe {
6514        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6515        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6516    }
6517}
6518
6519/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6522#[inline]
6523#[target_feature(enable = "avx512bw")]
6524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6525#[cfg_attr(test, assert_instr(vpackssdw))]
6526pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6527    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6528}
6529
6530/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6531///
6532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6533#[inline]
6534#[target_feature(enable = "avx512bw")]
6535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6536#[cfg_attr(test, assert_instr(vpackssdw))]
6537pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6538    unsafe {
6539        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6540        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6541    }
6542}
6543
6544/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6545///
6546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6547#[inline]
6548#[target_feature(enable = "avx512bw")]
6549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6550#[cfg_attr(test, assert_instr(vpackssdw))]
6551pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6552    unsafe {
6553        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6554        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6555    }
6556}
6557
6558/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6559///
6560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6561#[inline]
6562#[target_feature(enable = "avx512bw,avx512vl")]
6563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6564#[cfg_attr(test, assert_instr(vpackssdw))]
6565pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6566    unsafe {
6567        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6568        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6569    }
6570}
6571
6572/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6573///
6574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6575#[inline]
6576#[target_feature(enable = "avx512bw,avx512vl")]
6577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6578#[cfg_attr(test, assert_instr(vpackssdw))]
6579pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6580    unsafe {
6581        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6582        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6583    }
6584}
6585
6586/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6589#[inline]
6590#[target_feature(enable = "avx512bw,avx512vl")]
6591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6592#[cfg_attr(test, assert_instr(vpackssdw))]
6593pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6594    unsafe {
6595        let pack = _mm_packs_epi32(a, b).as_i16x8();
6596        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6597    }
6598}
6599
6600/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6601///
6602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6603#[inline]
6604#[target_feature(enable = "avx512bw,avx512vl")]
6605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6606#[cfg_attr(test, assert_instr(vpackssdw))]
6607pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6608    unsafe {
6609        let pack = _mm_packs_epi32(a, b).as_i16x8();
6610        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6611    }
6612}
6613
6614/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6615///
6616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6617#[inline]
6618#[target_feature(enable = "avx512bw")]
6619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6620#[cfg_attr(test, assert_instr(vpacksswb))]
6621pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6622    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6623}
6624
6625/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6626///
6627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6628#[inline]
6629#[target_feature(enable = "avx512bw")]
6630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6631#[cfg_attr(test, assert_instr(vpacksswb))]
6632pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6633    unsafe {
6634        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6635        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6636    }
6637}
6638
6639/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6640///
6641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6642#[inline]
6643#[target_feature(enable = "avx512bw")]
6644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6645#[cfg_attr(test, assert_instr(vpacksswb))]
6646pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6647    unsafe {
6648        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6649        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6650    }
6651}
6652
6653/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6656#[inline]
6657#[target_feature(enable = "avx512bw,avx512vl")]
6658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6659#[cfg_attr(test, assert_instr(vpacksswb))]
6660pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6661    unsafe {
6662        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6663        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6664    }
6665}
6666
6667/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6668///
6669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6670#[inline]
6671#[target_feature(enable = "avx512bw,avx512vl")]
6672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6673#[cfg_attr(test, assert_instr(vpacksswb))]
6674pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6675    unsafe {
6676        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6677        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6678    }
6679}
6680
6681/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6682///
6683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6684#[inline]
6685#[target_feature(enable = "avx512bw,avx512vl")]
6686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6687#[cfg_attr(test, assert_instr(vpacksswb))]
6688pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6689    unsafe {
6690        let pack = _mm_packs_epi16(a, b).as_i8x16();
6691        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6692    }
6693}
6694
6695/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6696///
6697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6698#[inline]
6699#[target_feature(enable = "avx512bw,avx512vl")]
6700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6701#[cfg_attr(test, assert_instr(vpacksswb))]
6702pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6703    unsafe {
6704        let pack = _mm_packs_epi16(a, b).as_i8x16();
6705        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6706    }
6707}
6708
6709/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6710///
6711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6712#[inline]
6713#[target_feature(enable = "avx512bw")]
6714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6715#[cfg_attr(test, assert_instr(vpackusdw))]
6716pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6717    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6718}
6719
6720/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6723#[inline]
6724#[target_feature(enable = "avx512bw")]
6725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6726#[cfg_attr(test, assert_instr(vpackusdw))]
6727pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6728    unsafe {
6729        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6730        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6731    }
6732}
6733
6734/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6735///
6736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6737#[inline]
6738#[target_feature(enable = "avx512bw")]
6739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6740#[cfg_attr(test, assert_instr(vpackusdw))]
6741pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6742    unsafe {
6743        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6744        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6745    }
6746}
6747
6748/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6749///
6750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6751#[inline]
6752#[target_feature(enable = "avx512bw,avx512vl")]
6753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6754#[cfg_attr(test, assert_instr(vpackusdw))]
6755pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6756    unsafe {
6757        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6758        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6759    }
6760}
6761
6762/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6763///
6764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6765#[inline]
6766#[target_feature(enable = "avx512bw,avx512vl")]
6767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6768#[cfg_attr(test, assert_instr(vpackusdw))]
6769pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6770    unsafe {
6771        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6772        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6773    }
6774}
6775
6776/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6777///
6778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6779#[inline]
6780#[target_feature(enable = "avx512bw,avx512vl")]
6781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6782#[cfg_attr(test, assert_instr(vpackusdw))]
6783pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6784    unsafe {
6785        let pack = _mm_packus_epi32(a, b).as_i16x8();
6786        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6787    }
6788}
6789
6790/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6791///
6792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6793#[inline]
6794#[target_feature(enable = "avx512bw,avx512vl")]
6795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6796#[cfg_attr(test, assert_instr(vpackusdw))]
6797pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6798    unsafe {
6799        let pack = _mm_packus_epi32(a, b).as_i16x8();
6800        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6801    }
6802}
6803
6804/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6805///
6806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6807#[inline]
6808#[target_feature(enable = "avx512bw")]
6809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6810#[cfg_attr(test, assert_instr(vpackuswb))]
6811pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6812    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6813}
6814
6815/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6816///
6817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6818#[inline]
6819#[target_feature(enable = "avx512bw")]
6820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6821#[cfg_attr(test, assert_instr(vpackuswb))]
6822pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6823    unsafe {
6824        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6825        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6826    }
6827}
6828
6829/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6830///
6831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6832#[inline]
6833#[target_feature(enable = "avx512bw")]
6834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6835#[cfg_attr(test, assert_instr(vpackuswb))]
6836pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6837    unsafe {
6838        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6839        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6840    }
6841}
6842
6843/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6844///
6845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6846#[inline]
6847#[target_feature(enable = "avx512bw,avx512vl")]
6848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6849#[cfg_attr(test, assert_instr(vpackuswb))]
6850pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6851    unsafe {
6852        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6853        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6854    }
6855}
6856
6857/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6858///
6859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6860#[inline]
6861#[target_feature(enable = "avx512bw,avx512vl")]
6862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6863#[cfg_attr(test, assert_instr(vpackuswb))]
6864pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6865    unsafe {
6866        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6867        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6868    }
6869}
6870
6871/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6872///
6873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6874#[inline]
6875#[target_feature(enable = "avx512bw,avx512vl")]
6876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6877#[cfg_attr(test, assert_instr(vpackuswb))]
6878pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6879    unsafe {
6880        let pack = _mm_packus_epi16(a, b).as_i8x16();
6881        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6882    }
6883}
6884
6885/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6886///
6887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6888#[inline]
6889#[target_feature(enable = "avx512bw,avx512vl")]
6890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6891#[cfg_attr(test, assert_instr(vpackuswb))]
6892pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6893    unsafe {
6894        let pack = _mm_packus_epi16(a, b).as_i8x16();
6895        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6896    }
6897}
6898
6899/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6900///
6901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6902#[inline]
6903#[target_feature(enable = "avx512bw")]
6904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6905#[cfg_attr(test, assert_instr(vpavgw))]
6906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6907pub const fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6908    unsafe {
6909        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6910        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6911        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6912        transmute(simd_cast::<_, u16x32>(r))
6913    }
6914}
6915
6916/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6917///
6918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6919#[inline]
6920#[target_feature(enable = "avx512bw")]
6921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6922#[cfg_attr(test, assert_instr(vpavgw))]
6923#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6924pub const fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6925    unsafe {
6926        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6927        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6928    }
6929}
6930
6931/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6932///
6933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6934#[inline]
6935#[target_feature(enable = "avx512bw")]
6936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6937#[cfg_attr(test, assert_instr(vpavgw))]
6938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6939pub const fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6940    unsafe {
6941        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6942        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6943    }
6944}
6945
6946/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6947///
6948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6949#[inline]
6950#[target_feature(enable = "avx512bw,avx512vl")]
6951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6952#[cfg_attr(test, assert_instr(vpavgw))]
6953#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6954pub const fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6955    unsafe {
6956        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6957        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6958    }
6959}
6960
6961/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6962///
6963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6964#[inline]
6965#[target_feature(enable = "avx512bw,avx512vl")]
6966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6967#[cfg_attr(test, assert_instr(vpavgw))]
6968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6969pub const fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6970    unsafe {
6971        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6972        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6973    }
6974}
6975
6976/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6977///
6978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6979#[inline]
6980#[target_feature(enable = "avx512bw,avx512vl")]
6981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6982#[cfg_attr(test, assert_instr(vpavgw))]
6983#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6984pub const fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6985    unsafe {
6986        let avg = _mm_avg_epu16(a, b).as_u16x8();
6987        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6988    }
6989}
6990
6991/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6992///
6993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6994#[inline]
6995#[target_feature(enable = "avx512bw,avx512vl")]
6996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6997#[cfg_attr(test, assert_instr(vpavgw))]
6998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6999pub const fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7000    unsafe {
7001        let avg = _mm_avg_epu16(a, b).as_u16x8();
7002        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
7003    }
7004}
7005
7006/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
7007///
7008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
7009#[inline]
7010#[target_feature(enable = "avx512bw")]
7011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7012#[cfg_attr(test, assert_instr(vpavgb))]
7013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7014pub const fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
7015    unsafe {
7016        let a = simd_cast::<_, u16x64>(a.as_u8x64());
7017        let b = simd_cast::<_, u16x64>(b.as_u8x64());
7018        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
7019        transmute(simd_cast::<_, u8x64>(r))
7020    }
7021}
7022
7023/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7024///
7025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
7026#[inline]
7027#[target_feature(enable = "avx512bw")]
7028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7029#[cfg_attr(test, assert_instr(vpavgb))]
7030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7031pub const fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7032    unsafe {
7033        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7034        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
7035    }
7036}
7037
7038/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7039///
7040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
7041#[inline]
7042#[target_feature(enable = "avx512bw")]
7043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7044#[cfg_attr(test, assert_instr(vpavgb))]
7045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7046pub const fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7047    unsafe {
7048        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7049        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
7050    }
7051}
7052
7053/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7054///
7055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
7056#[inline]
7057#[target_feature(enable = "avx512bw,avx512vl")]
7058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7059#[cfg_attr(test, assert_instr(vpavgb))]
7060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7061pub const fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7062    unsafe {
7063        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7064        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
7065    }
7066}
7067
7068/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7069///
7070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
7071#[inline]
7072#[target_feature(enable = "avx512bw,avx512vl")]
7073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074#[cfg_attr(test, assert_instr(vpavgb))]
7075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7076pub const fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7077    unsafe {
7078        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7079        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
7080    }
7081}
7082
7083/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7084///
7085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
7086#[inline]
7087#[target_feature(enable = "avx512bw,avx512vl")]
7088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7089#[cfg_attr(test, assert_instr(vpavgb))]
7090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7091pub const fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7092    unsafe {
7093        let avg = _mm_avg_epu8(a, b).as_u8x16();
7094        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
7095    }
7096}
7097
7098/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7099///
7100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
7101#[inline]
7102#[target_feature(enable = "avx512bw,avx512vl")]
7103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7104#[cfg_attr(test, assert_instr(vpavgb))]
7105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7106pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7107    unsafe {
7108        let avg = _mm_avg_epu8(a, b).as_u8x16();
7109        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
7110    }
7111}
7112
7113/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
7114///
7115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
7116#[inline]
7117#[target_feature(enable = "avx512bw")]
7118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7119#[cfg_attr(test, assert_instr(vpsllw))]
7120pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
7121    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
7122}
7123
7124/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7125///
7126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
7127#[inline]
7128#[target_feature(enable = "avx512bw")]
7129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7130#[cfg_attr(test, assert_instr(vpsllw))]
7131pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7132    unsafe {
7133        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7134        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7135    }
7136}
7137
7138/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7139///
7140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
7141#[inline]
7142#[target_feature(enable = "avx512bw")]
7143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7144#[cfg_attr(test, assert_instr(vpsllw))]
7145pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7146    unsafe {
7147        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7148        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7149    }
7150}
7151
7152/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7153///
7154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
7155#[inline]
7156#[target_feature(enable = "avx512bw,avx512vl")]
7157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7158#[cfg_attr(test, assert_instr(vpsllw))]
7159pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7160    unsafe {
7161        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7162        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7163    }
7164}
7165
7166/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7167///
7168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
7169#[inline]
7170#[target_feature(enable = "avx512bw,avx512vl")]
7171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7172#[cfg_attr(test, assert_instr(vpsllw))]
7173pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7174    unsafe {
7175        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7176        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7177    }
7178}
7179
7180/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7181///
7182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
7183#[inline]
7184#[target_feature(enable = "avx512bw,avx512vl")]
7185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7186#[cfg_attr(test, assert_instr(vpsllw))]
7187pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7188    unsafe {
7189        let shf = _mm_sll_epi16(a, count).as_i16x8();
7190        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7191    }
7192}
7193
7194/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7195///
7196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
7197#[inline]
7198#[target_feature(enable = "avx512bw,avx512vl")]
7199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7200#[cfg_attr(test, assert_instr(vpsllw))]
7201pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7202    unsafe {
7203        let shf = _mm_sll_epi16(a, count).as_i16x8();
7204        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7205    }
7206}
7207
7208/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
7209///
7210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
7211#[inline]
7212#[target_feature(enable = "avx512bw")]
7213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7214#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7215#[rustc_legacy_const_generics(1)]
7216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7217pub const fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7218    unsafe {
7219        static_assert_uimm_bits!(IMM8, 8);
7220        if IMM8 >= 16 {
7221            _mm512_setzero_si512()
7222        } else {
7223            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7224        }
7225    }
7226}
7227
7228/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7229///
7230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
7231#[inline]
7232#[target_feature(enable = "avx512bw")]
7233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7234#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7235#[rustc_legacy_const_generics(3)]
7236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7237pub const fn _mm512_mask_slli_epi16<const IMM8: u32>(
7238    src: __m512i,
7239    k: __mmask32,
7240    a: __m512i,
7241) -> __m512i {
7242    unsafe {
7243        static_assert_uimm_bits!(IMM8, 8);
7244        let shf = if IMM8 >= 16 {
7245            u16x32::ZERO
7246        } else {
7247            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7248        };
7249        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7250    }
7251}
7252
7253/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7254///
7255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
7256#[inline]
7257#[target_feature(enable = "avx512bw")]
7258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7259#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7260#[rustc_legacy_const_generics(2)]
7261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7262pub const fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7263    unsafe {
7264        static_assert_uimm_bits!(IMM8, 8);
7265        if IMM8 >= 16 {
7266            _mm512_setzero_si512()
7267        } else {
7268            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7269            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7270        }
7271    }
7272}
7273
7274/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7275///
7276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
7277#[inline]
7278#[target_feature(enable = "avx512bw,avx512vl")]
7279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7280#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7281#[rustc_legacy_const_generics(3)]
7282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7283pub const fn _mm256_mask_slli_epi16<const IMM8: u32>(
7284    src: __m256i,
7285    k: __mmask16,
7286    a: __m256i,
7287) -> __m256i {
7288    unsafe {
7289        static_assert_uimm_bits!(IMM8, 8);
7290        let shf = if IMM8 >= 16 {
7291            u16x16::ZERO
7292        } else {
7293            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
7294        };
7295        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
7296    }
7297}
7298
7299/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7300///
7301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
7302#[inline]
7303#[target_feature(enable = "avx512bw,avx512vl")]
7304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7305#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7306#[rustc_legacy_const_generics(2)]
7307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7308pub const fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7309    unsafe {
7310        static_assert_uimm_bits!(IMM8, 8);
7311        if IMM8 >= 16 {
7312            _mm256_setzero_si256()
7313        } else {
7314            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
7315            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
7316        }
7317    }
7318}
7319
7320/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7321///
7322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
7323#[inline]
7324#[target_feature(enable = "avx512bw,avx512vl")]
7325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7326#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7327#[rustc_legacy_const_generics(3)]
7328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7329pub const fn _mm_mask_slli_epi16<const IMM8: u32>(
7330    src: __m128i,
7331    k: __mmask8,
7332    a: __m128i,
7333) -> __m128i {
7334    unsafe {
7335        static_assert_uimm_bits!(IMM8, 8);
7336        let shf = if IMM8 >= 16 {
7337            u16x8::ZERO
7338        } else {
7339            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
7340        };
7341        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
7342    }
7343}
7344
7345/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7346///
7347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
7348#[inline]
7349#[target_feature(enable = "avx512bw,avx512vl")]
7350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7351#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7352#[rustc_legacy_const_generics(2)]
7353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7354pub const fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7355    unsafe {
7356        static_assert_uimm_bits!(IMM8, 8);
7357        if IMM8 >= 16 {
7358            _mm_setzero_si128()
7359        } else {
7360            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
7361            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
7362        }
7363    }
7364}
7365
7366/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7367///
7368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
7369#[inline]
7370#[target_feature(enable = "avx512bw")]
7371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7372#[cfg_attr(test, assert_instr(vpsllvw))]
7373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7374pub const fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
7375    unsafe {
7376        let count = count.as_u16x32();
7377        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7378        let count = simd_select(no_overflow, count, u16x32::ZERO);
7379        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7380    }
7381}
7382
7383/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7384///
7385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
7386#[inline]
7387#[target_feature(enable = "avx512bw")]
7388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7389#[cfg_attr(test, assert_instr(vpsllvw))]
7390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7391pub const fn _mm512_mask_sllv_epi16(
7392    src: __m512i,
7393    k: __mmask32,
7394    a: __m512i,
7395    count: __m512i,
7396) -> __m512i {
7397    unsafe {
7398        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7399        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7400    }
7401}
7402
7403/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7404///
7405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
7406#[inline]
7407#[target_feature(enable = "avx512bw")]
7408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7409#[cfg_attr(test, assert_instr(vpsllvw))]
7410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7411pub const fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7412    unsafe {
7413        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7414        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7415    }
7416}
7417
7418/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7419///
7420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
7421#[inline]
7422#[target_feature(enable = "avx512bw,avx512vl")]
7423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7424#[cfg_attr(test, assert_instr(vpsllvw))]
7425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7426pub const fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
7427    unsafe {
7428        let count = count.as_u16x16();
7429        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7430        let count = simd_select(no_overflow, count, u16x16::ZERO);
7431        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7432    }
7433}
7434
7435/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7436///
7437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
7438#[inline]
7439#[target_feature(enable = "avx512bw,avx512vl")]
7440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7441#[cfg_attr(test, assert_instr(vpsllvw))]
7442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7443pub const fn _mm256_mask_sllv_epi16(
7444    src: __m256i,
7445    k: __mmask16,
7446    a: __m256i,
7447    count: __m256i,
7448) -> __m256i {
7449    unsafe {
7450        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7451        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7452    }
7453}
7454
7455/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7456///
7457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
7458#[inline]
7459#[target_feature(enable = "avx512bw,avx512vl")]
7460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7461#[cfg_attr(test, assert_instr(vpsllvw))]
7462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7463pub const fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7464    unsafe {
7465        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7466        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7467    }
7468}
7469
7470/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7471///
7472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
7473#[inline]
7474#[target_feature(enable = "avx512bw,avx512vl")]
7475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7476#[cfg_attr(test, assert_instr(vpsllvw))]
7477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7478pub const fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
7479    unsafe {
7480        let count = count.as_u16x8();
7481        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7482        let count = simd_select(no_overflow, count, u16x8::ZERO);
7483        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7484    }
7485}
7486
7487/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7488///
7489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
7490#[inline]
7491#[target_feature(enable = "avx512bw,avx512vl")]
7492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7493#[cfg_attr(test, assert_instr(vpsllvw))]
7494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7495pub const fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7496    unsafe {
7497        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7498        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7499    }
7500}
7501
7502/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7503///
7504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
7505#[inline]
7506#[target_feature(enable = "avx512bw,avx512vl")]
7507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7508#[cfg_attr(test, assert_instr(vpsllvw))]
7509#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7510pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7511    unsafe {
7512        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7513        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7514    }
7515}
7516
7517/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
7518///
7519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
7520#[inline]
7521#[target_feature(enable = "avx512bw")]
7522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7523#[cfg_attr(test, assert_instr(vpsrlw))]
7524pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
7525    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
7526}
7527
7528/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7529///
7530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
7531#[inline]
7532#[target_feature(enable = "avx512bw")]
7533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7534#[cfg_attr(test, assert_instr(vpsrlw))]
7535pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7536    unsafe {
7537        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7538        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7539    }
7540}
7541
7542/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7543///
7544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
7545#[inline]
7546#[target_feature(enable = "avx512bw")]
7547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7548#[cfg_attr(test, assert_instr(vpsrlw))]
7549pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7550    unsafe {
7551        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7552        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7553    }
7554}
7555
7556/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7559#[inline]
7560#[target_feature(enable = "avx512bw,avx512vl")]
7561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7562#[cfg_attr(test, assert_instr(vpsrlw))]
7563pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7564    unsafe {
7565        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7566        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7567    }
7568}
7569
7570/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7571///
7572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7573#[inline]
7574#[target_feature(enable = "avx512bw,avx512vl")]
7575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7576#[cfg_attr(test, assert_instr(vpsrlw))]
7577pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7578    unsafe {
7579        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7580        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7581    }
7582}
7583
7584/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7585///
7586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7587#[inline]
7588#[target_feature(enable = "avx512bw,avx512vl")]
7589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7590#[cfg_attr(test, assert_instr(vpsrlw))]
7591pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7592    unsafe {
7593        let shf = _mm_srl_epi16(a, count).as_i16x8();
7594        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7595    }
7596}
7597
7598/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7599///
7600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7601#[inline]
7602#[target_feature(enable = "avx512bw,avx512vl")]
7603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7604#[cfg_attr(test, assert_instr(vpsrlw))]
7605pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7606    unsafe {
7607        let shf = _mm_srl_epi16(a, count).as_i16x8();
7608        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7609    }
7610}
7611
7612/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7613///
7614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7615#[inline]
7616#[target_feature(enable = "avx512bw")]
7617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7618#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7619#[rustc_legacy_const_generics(1)]
7620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7621pub const fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7622    unsafe {
7623        static_assert_uimm_bits!(IMM8, 8);
7624        if IMM8 >= 16 {
7625            _mm512_setzero_si512()
7626        } else {
7627            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7628        }
7629    }
7630}
7631
7632/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7633///
7634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7635#[inline]
7636#[target_feature(enable = "avx512bw")]
7637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7638#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7639#[rustc_legacy_const_generics(3)]
7640#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7641pub const fn _mm512_mask_srli_epi16<const IMM8: u32>(
7642    src: __m512i,
7643    k: __mmask32,
7644    a: __m512i,
7645) -> __m512i {
7646    unsafe {
7647        static_assert_uimm_bits!(IMM8, 8);
7648        let shf = if IMM8 >= 16 {
7649            u16x32::ZERO
7650        } else {
7651            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7652        };
7653        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7654    }
7655}
7656
7657/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7658///
7659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7660#[inline]
7661#[target_feature(enable = "avx512bw")]
7662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7663#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7664#[rustc_legacy_const_generics(2)]
7665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7666pub const fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7667    unsafe {
7668        static_assert_uimm_bits!(IMM8, 8);
7669        //imm8 should be u32, it seems the document to verify is incorrect
7670        if IMM8 >= 16 {
7671            _mm512_setzero_si512()
7672        } else {
7673            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7674            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7675        }
7676    }
7677}
7678
7679/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7680///
7681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7682#[inline]
7683#[target_feature(enable = "avx512bw,avx512vl")]
7684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7685#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7686#[rustc_legacy_const_generics(3)]
7687#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7688pub const fn _mm256_mask_srli_epi16<const IMM8: i32>(
7689    src: __m256i,
7690    k: __mmask16,
7691    a: __m256i,
7692) -> __m256i {
7693    unsafe {
7694        static_assert_uimm_bits!(IMM8, 8);
7695        let shf = _mm256_srli_epi16::<IMM8>(a);
7696        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7697    }
7698}
7699
7700/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7701///
7702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7703#[inline]
7704#[target_feature(enable = "avx512bw,avx512vl")]
7705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7706#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7707#[rustc_legacy_const_generics(2)]
7708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7709pub const fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7710    unsafe {
7711        static_assert_uimm_bits!(IMM8, 8);
7712        let shf = _mm256_srli_epi16::<IMM8>(a);
7713        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7714    }
7715}
7716
7717/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7718///
7719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7720#[inline]
7721#[target_feature(enable = "avx512bw,avx512vl")]
7722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7723#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7724#[rustc_legacy_const_generics(3)]
7725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7726pub const fn _mm_mask_srli_epi16<const IMM8: i32>(
7727    src: __m128i,
7728    k: __mmask8,
7729    a: __m128i,
7730) -> __m128i {
7731    unsafe {
7732        static_assert_uimm_bits!(IMM8, 8);
7733        let shf = _mm_srli_epi16::<IMM8>(a);
7734        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7735    }
7736}
7737
7738/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7739///
7740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7741#[inline]
7742#[target_feature(enable = "avx512bw,avx512vl")]
7743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7744#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7745#[rustc_legacy_const_generics(2)]
7746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7747pub const fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7748    unsafe {
7749        static_assert_uimm_bits!(IMM8, 8);
7750        let shf = _mm_srli_epi16::<IMM8>(a);
7751        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7752    }
7753}
7754
7755/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7756///
7757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7758#[inline]
7759#[target_feature(enable = "avx512bw")]
7760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7761#[cfg_attr(test, assert_instr(vpsrlvw))]
7762#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7763pub const fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7764    unsafe {
7765        let count = count.as_u16x32();
7766        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7767        let count = simd_select(no_overflow, count, u16x32::ZERO);
7768        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7769    }
7770}
7771
7772/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7773///
7774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7775#[inline]
7776#[target_feature(enable = "avx512bw")]
7777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7778#[cfg_attr(test, assert_instr(vpsrlvw))]
7779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7780pub const fn _mm512_mask_srlv_epi16(
7781    src: __m512i,
7782    k: __mmask32,
7783    a: __m512i,
7784    count: __m512i,
7785) -> __m512i {
7786    unsafe {
7787        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7788        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7789    }
7790}
7791
7792/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7793///
7794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7795#[inline]
7796#[target_feature(enable = "avx512bw")]
7797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7798#[cfg_attr(test, assert_instr(vpsrlvw))]
7799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7800pub const fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7801    unsafe {
7802        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7803        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7804    }
7805}
7806
7807/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7808///
7809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7810#[inline]
7811#[target_feature(enable = "avx512bw,avx512vl")]
7812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7813#[cfg_attr(test, assert_instr(vpsrlvw))]
7814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7815pub const fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7816    unsafe {
7817        let count = count.as_u16x16();
7818        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7819        let count = simd_select(no_overflow, count, u16x16::ZERO);
7820        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7821    }
7822}
7823
7824/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7825///
7826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7827#[inline]
7828#[target_feature(enable = "avx512bw,avx512vl")]
7829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7830#[cfg_attr(test, assert_instr(vpsrlvw))]
7831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7832pub const fn _mm256_mask_srlv_epi16(
7833    src: __m256i,
7834    k: __mmask16,
7835    a: __m256i,
7836    count: __m256i,
7837) -> __m256i {
7838    unsafe {
7839        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7840        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7841    }
7842}
7843
7844/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7847#[inline]
7848#[target_feature(enable = "avx512bw,avx512vl")]
7849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7850#[cfg_attr(test, assert_instr(vpsrlvw))]
7851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7852pub const fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7853    unsafe {
7854        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7855        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7856    }
7857}
7858
7859/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7860///
7861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7862#[inline]
7863#[target_feature(enable = "avx512bw,avx512vl")]
7864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7865#[cfg_attr(test, assert_instr(vpsrlvw))]
7866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7867pub const fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7868    unsafe {
7869        let count = count.as_u16x8();
7870        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7871        let count = simd_select(no_overflow, count, u16x8::ZERO);
7872        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7873    }
7874}
7875
7876/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7877///
7878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7879#[inline]
7880#[target_feature(enable = "avx512bw,avx512vl")]
7881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7882#[cfg_attr(test, assert_instr(vpsrlvw))]
7883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7884pub const fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7885    unsafe {
7886        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7887        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7888    }
7889}
7890
7891/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7892///
7893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7894#[inline]
7895#[target_feature(enable = "avx512bw,avx512vl")]
7896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7897#[cfg_attr(test, assert_instr(vpsrlvw))]
7898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7899pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7900    unsafe {
7901        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7902        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7903    }
7904}
7905
7906/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7907///
7908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7909#[inline]
7910#[target_feature(enable = "avx512bw")]
7911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7912#[cfg_attr(test, assert_instr(vpsraw))]
7913pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7914    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7915}
7916
7917/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7918///
7919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7920#[inline]
7921#[target_feature(enable = "avx512bw")]
7922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7923#[cfg_attr(test, assert_instr(vpsraw))]
7924pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7925    unsafe {
7926        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7927        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7928    }
7929}
7930
7931/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7932///
7933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7934#[inline]
7935#[target_feature(enable = "avx512bw")]
7936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7937#[cfg_attr(test, assert_instr(vpsraw))]
7938pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7939    unsafe {
7940        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7941        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7942    }
7943}
7944
7945/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7946///
7947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7948#[inline]
7949#[target_feature(enable = "avx512bw,avx512vl")]
7950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7951#[cfg_attr(test, assert_instr(vpsraw))]
7952pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7953    unsafe {
7954        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7955        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7956    }
7957}
7958
7959/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7960///
7961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7962#[inline]
7963#[target_feature(enable = "avx512bw,avx512vl")]
7964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7965#[cfg_attr(test, assert_instr(vpsraw))]
7966pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7967    unsafe {
7968        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7969        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7970    }
7971}
7972
7973/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7974///
7975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7976#[inline]
7977#[target_feature(enable = "avx512bw,avx512vl")]
7978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7979#[cfg_attr(test, assert_instr(vpsraw))]
7980pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7981    unsafe {
7982        let shf = _mm_sra_epi16(a, count).as_i16x8();
7983        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7984    }
7985}
7986
7987/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7990#[inline]
7991#[target_feature(enable = "avx512bw,avx512vl")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vpsraw))]
7994pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7995    unsafe {
7996        let shf = _mm_sra_epi16(a, count).as_i16x8();
7997        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7998    }
7999}
8000
8001/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
8002///
8003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
8004#[inline]
8005#[target_feature(enable = "avx512bw")]
8006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8007#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8008#[rustc_legacy_const_generics(1)]
8009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8010pub const fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
8011    unsafe {
8012        static_assert_uimm_bits!(IMM8, 8);
8013        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
8014    }
8015}
8016
8017/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8018///
8019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
8020#[inline]
8021#[target_feature(enable = "avx512bw")]
8022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8023#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8024#[rustc_legacy_const_generics(3)]
8025#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8026pub const fn _mm512_mask_srai_epi16<const IMM8: u32>(
8027    src: __m512i,
8028    k: __mmask32,
8029    a: __m512i,
8030) -> __m512i {
8031    unsafe {
8032        static_assert_uimm_bits!(IMM8, 8);
8033        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8034        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8035    }
8036}
8037
8038/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8039///
8040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
8041#[inline]
8042#[target_feature(enable = "avx512bw")]
8043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8044#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8045#[rustc_legacy_const_generics(2)]
8046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8047pub const fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
8048    unsafe {
8049        static_assert_uimm_bits!(IMM8, 8);
8050        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8051        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8052    }
8053}
8054
8055/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8056///
8057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
8058#[inline]
8059#[target_feature(enable = "avx512bw,avx512vl")]
8060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8061#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8062#[rustc_legacy_const_generics(3)]
8063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8064pub const fn _mm256_mask_srai_epi16<const IMM8: u32>(
8065    src: __m256i,
8066    k: __mmask16,
8067    a: __m256i,
8068) -> __m256i {
8069    unsafe {
8070        static_assert_uimm_bits!(IMM8, 8);
8071        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8072        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8073    }
8074}
8075
8076/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8077///
8078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
8079#[inline]
8080#[target_feature(enable = "avx512bw,avx512vl")]
8081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8082#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8083#[rustc_legacy_const_generics(2)]
8084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8085pub const fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
8086    unsafe {
8087        static_assert_uimm_bits!(IMM8, 8);
8088        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8089        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8090    }
8091}
8092
8093/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8094///
8095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
8096#[inline]
8097#[target_feature(enable = "avx512bw,avx512vl")]
8098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8099#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8100#[rustc_legacy_const_generics(3)]
8101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8102pub const fn _mm_mask_srai_epi16<const IMM8: u32>(
8103    src: __m128i,
8104    k: __mmask8,
8105    a: __m128i,
8106) -> __m128i {
8107    unsafe {
8108        static_assert_uimm_bits!(IMM8, 8);
8109        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8110        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8111    }
8112}
8113
8114/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8115///
8116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
8117#[inline]
8118#[target_feature(enable = "avx512bw,avx512vl")]
8119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8120#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8121#[rustc_legacy_const_generics(2)]
8122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8123pub const fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
8124    unsafe {
8125        static_assert_uimm_bits!(IMM8, 8);
8126        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8127        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8128    }
8129}
8130
8131/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8132///
8133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
8134#[inline]
8135#[target_feature(enable = "avx512bw")]
8136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8137#[cfg_attr(test, assert_instr(vpsravw))]
8138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8139pub const fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
8140    unsafe {
8141        let count = count.as_u16x32();
8142        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
8143        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
8144        simd_shr(a.as_i16x32(), count).as_m512i()
8145    }
8146}
8147
8148/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8149///
8150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
8151#[inline]
8152#[target_feature(enable = "avx512bw")]
8153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8154#[cfg_attr(test, assert_instr(vpsravw))]
8155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8156pub const fn _mm512_mask_srav_epi16(
8157    src: __m512i,
8158    k: __mmask32,
8159    a: __m512i,
8160    count: __m512i,
8161) -> __m512i {
8162    unsafe {
8163        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8164        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8165    }
8166}
8167
8168/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8169///
8170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
8171#[inline]
8172#[target_feature(enable = "avx512bw")]
8173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8174#[cfg_attr(test, assert_instr(vpsravw))]
8175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8176pub const fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
8177    unsafe {
8178        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8179        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8180    }
8181}
8182
8183/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8184///
8185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
8186#[inline]
8187#[target_feature(enable = "avx512bw,avx512vl")]
8188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8189#[cfg_attr(test, assert_instr(vpsravw))]
8190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8191pub const fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
8192    unsafe {
8193        let count = count.as_u16x16();
8194        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
8195        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
8196        simd_shr(a.as_i16x16(), count).as_m256i()
8197    }
8198}
8199
8200/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8201///
8202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
8203#[inline]
8204#[target_feature(enable = "avx512bw,avx512vl")]
8205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8206#[cfg_attr(test, assert_instr(vpsravw))]
8207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8208pub const fn _mm256_mask_srav_epi16(
8209    src: __m256i,
8210    k: __mmask16,
8211    a: __m256i,
8212    count: __m256i,
8213) -> __m256i {
8214    unsafe {
8215        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8216        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
8217    }
8218}
8219
8220/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8221///
8222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
8223#[inline]
8224#[target_feature(enable = "avx512bw,avx512vl")]
8225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8226#[cfg_attr(test, assert_instr(vpsravw))]
8227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8228pub const fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
8229    unsafe {
8230        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8231        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
8232    }
8233}
8234
8235/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8236///
8237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
8238#[inline]
8239#[target_feature(enable = "avx512bw,avx512vl")]
8240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8241#[cfg_attr(test, assert_instr(vpsravw))]
8242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8243pub const fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
8244    unsafe {
8245        let count = count.as_u16x8();
8246        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
8247        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
8248        simd_shr(a.as_i16x8(), count).as_m128i()
8249    }
8250}
8251
8252/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8253///
8254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
8255#[inline]
8256#[target_feature(enable = "avx512bw,avx512vl")]
8257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8258#[cfg_attr(test, assert_instr(vpsravw))]
8259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8260pub const fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8261    unsafe {
8262        let shf = _mm_srav_epi16(a, count).as_i16x8();
8263        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
8264    }
8265}
8266
8267/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8268///
8269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
8270#[inline]
8271#[target_feature(enable = "avx512bw,avx512vl")]
8272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8273#[cfg_attr(test, assert_instr(vpsravw))]
8274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8275pub const fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8276    unsafe {
8277        let shf = _mm_srav_epi16(a, count).as_i16x8();
8278        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8279    }
8280}
8281
8282/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8283///
8284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
8285#[inline]
8286#[target_feature(enable = "avx512bw")]
8287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8288#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8289pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
8290    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
8291}
8292
8293/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
8296#[inline]
8297#[target_feature(enable = "avx512bw")]
8298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299#[cfg_attr(test, assert_instr(vpermt2w))]
8300pub fn _mm512_mask_permutex2var_epi16(
8301    a: __m512i,
8302    k: __mmask32,
8303    idx: __m512i,
8304    b: __m512i,
8305) -> __m512i {
8306    unsafe {
8307        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8308        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
8309    }
8310}
8311
8312/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8313///
8314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
8315#[inline]
8316#[target_feature(enable = "avx512bw")]
8317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8318#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8319pub fn _mm512_maskz_permutex2var_epi16(
8320    k: __mmask32,
8321    a: __m512i,
8322    idx: __m512i,
8323    b: __m512i,
8324) -> __m512i {
8325    unsafe {
8326        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8327        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8328    }
8329}
8330
8331/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8332///
8333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
8334#[inline]
8335#[target_feature(enable = "avx512bw")]
8336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8337#[cfg_attr(test, assert_instr(vpermi2w))]
8338pub fn _mm512_mask2_permutex2var_epi16(
8339    a: __m512i,
8340    idx: __m512i,
8341    k: __mmask32,
8342    b: __m512i,
8343) -> __m512i {
8344    unsafe {
8345        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8346        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
8347    }
8348}
8349
8350/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8351///
8352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
8353#[inline]
8354#[target_feature(enable = "avx512bw,avx512vl")]
8355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8356#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8357pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
8358    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
8359}
8360
8361/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8362///
8363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
8364#[inline]
8365#[target_feature(enable = "avx512bw,avx512vl")]
8366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8367#[cfg_attr(test, assert_instr(vpermt2w))]
8368pub fn _mm256_mask_permutex2var_epi16(
8369    a: __m256i,
8370    k: __mmask16,
8371    idx: __m256i,
8372    b: __m256i,
8373) -> __m256i {
8374    unsafe {
8375        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8376        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
8377    }
8378}
8379
8380/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8381///
8382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
8383#[inline]
8384#[target_feature(enable = "avx512bw,avx512vl")]
8385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8386#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8387pub fn _mm256_maskz_permutex2var_epi16(
8388    k: __mmask16,
8389    a: __m256i,
8390    idx: __m256i,
8391    b: __m256i,
8392) -> __m256i {
8393    unsafe {
8394        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8395        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8396    }
8397}
8398
8399/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
8402#[inline]
8403#[target_feature(enable = "avx512bw,avx512vl")]
8404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8405#[cfg_attr(test, assert_instr(vpermi2w))]
8406pub fn _mm256_mask2_permutex2var_epi16(
8407    a: __m256i,
8408    idx: __m256i,
8409    k: __mmask16,
8410    b: __m256i,
8411) -> __m256i {
8412    unsafe {
8413        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8414        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
8415    }
8416}
8417
8418/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8419///
8420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
8421#[inline]
8422#[target_feature(enable = "avx512bw,avx512vl")]
8423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8424#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8425pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8426    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
8427}
8428
8429/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8430///
8431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
8432#[inline]
8433#[target_feature(enable = "avx512bw,avx512vl")]
8434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8435#[cfg_attr(test, assert_instr(vpermt2w))]
8436pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
8437    unsafe {
8438        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8439        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
8440    }
8441}
8442
8443/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8444///
8445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
8446#[inline]
8447#[target_feature(enable = "avx512bw,avx512vl")]
8448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8449#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8450pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8451    unsafe {
8452        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8453        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8454    }
8455}
8456
8457/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8458///
8459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
8460#[inline]
8461#[target_feature(enable = "avx512bw,avx512vl")]
8462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8463#[cfg_attr(test, assert_instr(vpermi2w))]
8464pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
8465    unsafe {
8466        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8467        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
8468    }
8469}
8470
8471/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8472///
8473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
8474#[inline]
8475#[target_feature(enable = "avx512bw")]
8476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8477#[cfg_attr(test, assert_instr(vpermw))]
8478pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
8479    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
8480}
8481
8482/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8483///
8484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
8485#[inline]
8486#[target_feature(enable = "avx512bw")]
8487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8488#[cfg_attr(test, assert_instr(vpermw))]
8489pub fn _mm512_mask_permutexvar_epi16(
8490    src: __m512i,
8491    k: __mmask32,
8492    idx: __m512i,
8493    a: __m512i,
8494) -> __m512i {
8495    unsafe {
8496        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8497        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
8498    }
8499}
8500
8501/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8502///
8503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
8504#[inline]
8505#[target_feature(enable = "avx512bw")]
8506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8507#[cfg_attr(test, assert_instr(vpermw))]
8508pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
8509    unsafe {
8510        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8511        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8512    }
8513}
8514
8515/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8516///
8517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
8518#[inline]
8519#[target_feature(enable = "avx512bw,avx512vl")]
8520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8521#[cfg_attr(test, assert_instr(vpermw))]
8522pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
8523    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
8524}
8525
8526/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8527///
8528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
8529#[inline]
8530#[target_feature(enable = "avx512bw,avx512vl")]
8531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8532#[cfg_attr(test, assert_instr(vpermw))]
8533pub fn _mm256_mask_permutexvar_epi16(
8534    src: __m256i,
8535    k: __mmask16,
8536    idx: __m256i,
8537    a: __m256i,
8538) -> __m256i {
8539    unsafe {
8540        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8541        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
8542    }
8543}
8544
8545/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8546///
8547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
8548#[inline]
8549#[target_feature(enable = "avx512bw,avx512vl")]
8550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551#[cfg_attr(test, assert_instr(vpermw))]
8552pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
8553    unsafe {
8554        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8555        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8556    }
8557}
8558
8559/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8560///
8561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
8562#[inline]
8563#[target_feature(enable = "avx512bw,avx512vl")]
8564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8565#[cfg_attr(test, assert_instr(vpermw))]
8566pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
8567    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
8568}
8569
8570/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8571///
8572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
8573#[inline]
8574#[target_feature(enable = "avx512bw,avx512vl")]
8575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8576#[cfg_attr(test, assert_instr(vpermw))]
8577pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8578    unsafe {
8579        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8580        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
8581    }
8582}
8583
8584/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8585///
8586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
8587#[inline]
8588#[target_feature(enable = "avx512bw,avx512vl")]
8589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8590#[cfg_attr(test, assert_instr(vpermw))]
8591pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8592    unsafe {
8593        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8594        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8595    }
8596}
8597
8598/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8599///
8600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
8601#[inline]
8602#[target_feature(enable = "avx512bw")]
8603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8604#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8606pub const fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8607    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
8608}
8609
8610/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8611///
8612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
8613#[inline]
8614#[target_feature(enable = "avx512bw,avx512vl")]
8615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8616#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8618pub const fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8619    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
8620}
8621
8622/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8623///
8624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
8625#[inline]
8626#[target_feature(enable = "avx512bw,avx512vl")]
8627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8628#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8630pub const fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8631    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
8632}
8633
8634/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8635///
8636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
8637#[inline]
8638#[target_feature(enable = "avx512bw")]
8639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8640#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8642pub const fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8643    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
8644}
8645
8646/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8647///
8648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
8649#[inline]
8650#[target_feature(enable = "avx512bw,avx512vl")]
8651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8652#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8653#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8654pub const fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8655    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
8656}
8657
8658/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8659///
8660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
8661#[inline]
8662#[target_feature(enable = "avx512bw,avx512vl")]
8663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8664#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8666pub const fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8667    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8668}
8669
8670/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8671///
8672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8673#[inline]
8674#[target_feature(enable = "avx512bw")]
8675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8676#[cfg_attr(test, assert_instr(vpbroadcastw))]
8677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8678pub const fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8679    unsafe {
8680        let a = _mm512_castsi128_si512(a).as_i16x32();
8681        let ret: i16x32 = simd_shuffle!(
8682            a,
8683            a,
8684            [
8685                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8686                0, 0, 0, 0,
8687            ],
8688        );
8689        transmute(ret)
8690    }
8691}
8692
8693/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8694///
8695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8696#[inline]
8697#[target_feature(enable = "avx512bw")]
8698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8699#[cfg_attr(test, assert_instr(vpbroadcastw))]
8700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8701pub const fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8702    unsafe {
8703        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8704        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8705    }
8706}
8707
8708/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8709///
8710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8711#[inline]
8712#[target_feature(enable = "avx512bw")]
8713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8714#[cfg_attr(test, assert_instr(vpbroadcastw))]
8715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8716pub const fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8717    unsafe {
8718        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8719        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8720    }
8721}
8722
8723/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8724///
8725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8726#[inline]
8727#[target_feature(enable = "avx512bw,avx512vl")]
8728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8729#[cfg_attr(test, assert_instr(vpbroadcastw))]
8730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8731pub const fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8732    unsafe {
8733        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8734        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8735    }
8736}
8737
8738/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8739///
8740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8741#[inline]
8742#[target_feature(enable = "avx512bw,avx512vl")]
8743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8744#[cfg_attr(test, assert_instr(vpbroadcastw))]
8745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8746pub const fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8747    unsafe {
8748        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8749        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8750    }
8751}
8752
8753/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8754///
8755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8756#[inline]
8757#[target_feature(enable = "avx512bw,avx512vl")]
8758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8759#[cfg_attr(test, assert_instr(vpbroadcastw))]
8760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8761pub const fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8762    unsafe {
8763        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8764        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8765    }
8766}
8767
8768/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8769///
8770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8771#[inline]
8772#[target_feature(enable = "avx512bw,avx512vl")]
8773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8774#[cfg_attr(test, assert_instr(vpbroadcastw))]
8775#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8776pub const fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8777    unsafe {
8778        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8779        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8780    }
8781}
8782
8783/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8784///
8785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8786#[inline]
8787#[target_feature(enable = "avx512bw")]
8788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8789#[cfg_attr(test, assert_instr(vpbroadcastb))]
8790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8791pub const fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8792    unsafe {
8793        let a = _mm512_castsi128_si512(a).as_i8x64();
8794        let ret: i8x64 = simd_shuffle!(
8795            a,
8796            a,
8797            [
8798                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8799                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8800                0, 0, 0, 0, 0, 0, 0, 0,
8801            ],
8802        );
8803        transmute(ret)
8804    }
8805}
8806
8807/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8808///
8809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8810#[inline]
8811#[target_feature(enable = "avx512bw")]
8812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8813#[cfg_attr(test, assert_instr(vpbroadcastb))]
8814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8815pub const fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8816    unsafe {
8817        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8818        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8819    }
8820}
8821
8822/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8823///
8824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8825#[inline]
8826#[target_feature(enable = "avx512bw")]
8827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8828#[cfg_attr(test, assert_instr(vpbroadcastb))]
8829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8830pub const fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8831    unsafe {
8832        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8833        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8834    }
8835}
8836
8837/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8840#[inline]
8841#[target_feature(enable = "avx512bw,avx512vl")]
8842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8843#[cfg_attr(test, assert_instr(vpbroadcastb))]
8844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8845pub const fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8846    unsafe {
8847        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8848        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8849    }
8850}
8851
8852/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8853///
8854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8855#[inline]
8856#[target_feature(enable = "avx512bw,avx512vl")]
8857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8858#[cfg_attr(test, assert_instr(vpbroadcastb))]
8859#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8860pub const fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8861    unsafe {
8862        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8863        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8864    }
8865}
8866
8867/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8868///
8869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8870#[inline]
8871#[target_feature(enable = "avx512bw,avx512vl")]
8872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8873#[cfg_attr(test, assert_instr(vpbroadcastb))]
8874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8875pub const fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8876    unsafe {
8877        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8878        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8879    }
8880}
8881
8882/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8883///
8884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8885#[inline]
8886#[target_feature(enable = "avx512bw,avx512vl")]
8887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8888#[cfg_attr(test, assert_instr(vpbroadcastb))]
8889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8890pub const fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8891    unsafe {
8892        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8893        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8894    }
8895}
8896
8897/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8898///
8899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8900#[inline]
8901#[target_feature(enable = "avx512bw")]
8902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8903#[cfg_attr(test, assert_instr(vpunpckhwd))]
8904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8905pub const fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8906    unsafe {
8907        let a = a.as_i16x32();
8908        let b = b.as_i16x32();
8909        #[rustfmt::skip]
8910        let r: i16x32 = simd_shuffle!(
8911            a,
8912            b,
8913            [
8914                4, 32 + 4, 5, 32 + 5,
8915                6, 32 + 6, 7, 32 + 7,
8916                12, 32 + 12, 13, 32 + 13,
8917                14, 32 + 14, 15, 32 + 15,
8918                20, 32 + 20, 21, 32 + 21,
8919                22, 32 + 22, 23, 32 + 23,
8920                28, 32 + 28, 29, 32 + 29,
8921                30, 32 + 30, 31, 32 + 31,
8922            ],
8923        );
8924        transmute(r)
8925    }
8926}
8927
8928/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8929///
8930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8931#[inline]
8932#[target_feature(enable = "avx512bw")]
8933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8934#[cfg_attr(test, assert_instr(vpunpckhwd))]
8935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8936pub const fn _mm512_mask_unpackhi_epi16(
8937    src: __m512i,
8938    k: __mmask32,
8939    a: __m512i,
8940    b: __m512i,
8941) -> __m512i {
8942    unsafe {
8943        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8944        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8945    }
8946}
8947
8948/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8949///
8950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8951#[inline]
8952#[target_feature(enable = "avx512bw")]
8953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8954#[cfg_attr(test, assert_instr(vpunpckhwd))]
8955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8956pub const fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8957    unsafe {
8958        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8959        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8960    }
8961}
8962
8963/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8964///
8965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8966#[inline]
8967#[target_feature(enable = "avx512bw,avx512vl")]
8968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8969#[cfg_attr(test, assert_instr(vpunpckhwd))]
8970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8971pub const fn _mm256_mask_unpackhi_epi16(
8972    src: __m256i,
8973    k: __mmask16,
8974    a: __m256i,
8975    b: __m256i,
8976) -> __m256i {
8977    unsafe {
8978        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8979        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8980    }
8981}
8982
8983/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8984///
8985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8986#[inline]
8987#[target_feature(enable = "avx512bw,avx512vl")]
8988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8989#[cfg_attr(test, assert_instr(vpunpckhwd))]
8990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8991pub const fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8992    unsafe {
8993        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8994        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8995    }
8996}
8997
8998/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8999///
9000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
9001#[inline]
9002#[target_feature(enable = "avx512bw,avx512vl")]
9003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9004#[cfg_attr(test, assert_instr(vpunpckhwd))]
9005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9006pub const fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9007    unsafe {
9008        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9009        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
9010    }
9011}
9012
9013/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9014///
9015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
9016#[inline]
9017#[target_feature(enable = "avx512bw,avx512vl")]
9018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9019#[cfg_attr(test, assert_instr(vpunpckhwd))]
9020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9021pub const fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9022    unsafe {
9023        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9024        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
9025    }
9026}
9027
9028/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
9029///
9030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
9031#[inline]
9032#[target_feature(enable = "avx512bw")]
9033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9034#[cfg_attr(test, assert_instr(vpunpckhbw))]
9035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9036pub const fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
9037    unsafe {
9038        let a = a.as_i8x64();
9039        let b = b.as_i8x64();
9040        #[rustfmt::skip]
9041        let r: i8x64 = simd_shuffle!(
9042            a,
9043            b,
9044            [
9045                8, 64 + 8, 9, 64 + 9,
9046                10, 64 + 10, 11, 64 + 11,
9047                12, 64 + 12, 13, 64 + 13,
9048                14, 64 + 14, 15, 64 + 15,
9049                24, 64 + 24, 25, 64 + 25,
9050                26, 64 + 26, 27, 64 + 27,
9051                28, 64 + 28, 29, 64 + 29,
9052                30, 64 + 30, 31, 64 + 31,
9053                40, 64 + 40, 41, 64 + 41,
9054                42, 64 + 42, 43, 64 + 43,
9055                44, 64 + 44, 45, 64 + 45,
9056                46, 64 + 46, 47, 64 + 47,
9057                56, 64 + 56, 57, 64 + 57,
9058                58, 64 + 58, 59, 64 + 59,
9059                60, 64 + 60, 61, 64 + 61,
9060                62, 64 + 62, 63, 64 + 63,
9061            ],
9062        );
9063        transmute(r)
9064    }
9065}
9066
9067/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9068///
9069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
9070#[inline]
9071#[target_feature(enable = "avx512bw")]
9072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9073#[cfg_attr(test, assert_instr(vpunpckhbw))]
9074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9075pub const fn _mm512_mask_unpackhi_epi8(
9076    src: __m512i,
9077    k: __mmask64,
9078    a: __m512i,
9079    b: __m512i,
9080) -> __m512i {
9081    unsafe {
9082        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9083        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
9084    }
9085}
9086
9087/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9088///
9089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
9090#[inline]
9091#[target_feature(enable = "avx512bw")]
9092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9093#[cfg_attr(test, assert_instr(vpunpckhbw))]
9094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9095pub const fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9096    unsafe {
9097        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9098        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
9099    }
9100}
9101
9102/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9103///
9104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
9105#[inline]
9106#[target_feature(enable = "avx512bw,avx512vl")]
9107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9108#[cfg_attr(test, assert_instr(vpunpckhbw))]
9109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9110pub const fn _mm256_mask_unpackhi_epi8(
9111    src: __m256i,
9112    k: __mmask32,
9113    a: __m256i,
9114    b: __m256i,
9115) -> __m256i {
9116    unsafe {
9117        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9118        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
9119    }
9120}
9121
9122/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9123///
9124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
9125#[inline]
9126#[target_feature(enable = "avx512bw,avx512vl")]
9127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9128#[cfg_attr(test, assert_instr(vpunpckhbw))]
9129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9130pub const fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9131    unsafe {
9132        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9133        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
9134    }
9135}
9136
9137/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9138///
9139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
9140#[inline]
9141#[target_feature(enable = "avx512bw,avx512vl")]
9142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9143#[cfg_attr(test, assert_instr(vpunpckhbw))]
9144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9145pub const fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9146    unsafe {
9147        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9148        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
9149    }
9150}
9151
9152/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9153///
9154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
9155#[inline]
9156#[target_feature(enable = "avx512bw,avx512vl")]
9157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9158#[cfg_attr(test, assert_instr(vpunpckhbw))]
9159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9160pub const fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9161    unsafe {
9162        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9163        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
9164    }
9165}
9166
9167/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9168///
9169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
9170#[inline]
9171#[target_feature(enable = "avx512bw")]
9172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9173#[cfg_attr(test, assert_instr(vpunpcklwd))]
9174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9175pub const fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
9176    unsafe {
9177        let a = a.as_i16x32();
9178        let b = b.as_i16x32();
9179        #[rustfmt::skip]
9180        let r: i16x32 = simd_shuffle!(
9181            a,
9182            b,
9183            [
9184               0,  32+0,   1, 32+1,
9185               2,  32+2,   3, 32+3,
9186               8,  32+8,   9, 32+9,
9187               10, 32+10, 11, 32+11,
9188               16, 32+16, 17, 32+17,
9189               18, 32+18, 19, 32+19,
9190               24, 32+24, 25, 32+25,
9191               26, 32+26, 27, 32+27
9192            ],
9193        );
9194        transmute(r)
9195    }
9196}
9197
9198/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9199///
9200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
9201#[inline]
9202#[target_feature(enable = "avx512bw")]
9203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9204#[cfg_attr(test, assert_instr(vpunpcklwd))]
9205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9206pub const fn _mm512_mask_unpacklo_epi16(
9207    src: __m512i,
9208    k: __mmask32,
9209    a: __m512i,
9210    b: __m512i,
9211) -> __m512i {
9212    unsafe {
9213        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9214        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
9215    }
9216}
9217
9218/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9219///
9220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
9221#[inline]
9222#[target_feature(enable = "avx512bw")]
9223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9224#[cfg_attr(test, assert_instr(vpunpcklwd))]
9225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9226pub const fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9227    unsafe {
9228        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9229        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
9230    }
9231}
9232
9233/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9234///
9235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
9236#[inline]
9237#[target_feature(enable = "avx512bw,avx512vl")]
9238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9239#[cfg_attr(test, assert_instr(vpunpcklwd))]
9240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9241pub const fn _mm256_mask_unpacklo_epi16(
9242    src: __m256i,
9243    k: __mmask16,
9244    a: __m256i,
9245    b: __m256i,
9246) -> __m256i {
9247    unsafe {
9248        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9249        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
9250    }
9251}
9252
9253/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9254///
9255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
9256#[inline]
9257#[target_feature(enable = "avx512bw,avx512vl")]
9258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9259#[cfg_attr(test, assert_instr(vpunpcklwd))]
9260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9261pub const fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9262    unsafe {
9263        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9264        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
9265    }
9266}
9267
9268/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9269///
9270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
9271#[inline]
9272#[target_feature(enable = "avx512bw,avx512vl")]
9273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9274#[cfg_attr(test, assert_instr(vpunpcklwd))]
9275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9276pub const fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9277    unsafe {
9278        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9279        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
9280    }
9281}
9282
9283/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9284///
9285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
9286#[inline]
9287#[target_feature(enable = "avx512bw,avx512vl")]
9288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9289#[cfg_attr(test, assert_instr(vpunpcklwd))]
9290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9291pub const fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9292    unsafe {
9293        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9294        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
9295    }
9296}
9297
9298/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9299///
9300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
9301#[inline]
9302#[target_feature(enable = "avx512bw")]
9303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9304#[cfg_attr(test, assert_instr(vpunpcklbw))]
9305#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9306pub const fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
9307    unsafe {
9308        let a = a.as_i8x64();
9309        let b = b.as_i8x64();
9310        #[rustfmt::skip]
9311        let r: i8x64 = simd_shuffle!(
9312            a,
9313            b,
9314            [
9315                0,  64+0,   1, 64+1,
9316                2,  64+2,   3, 64+3,
9317                4,  64+4,   5, 64+5,
9318                6,  64+6,   7, 64+7,
9319                16, 64+16, 17, 64+17,
9320                18, 64+18, 19, 64+19,
9321                20, 64+20, 21, 64+21,
9322                22, 64+22, 23, 64+23,
9323                32, 64+32, 33, 64+33,
9324                34, 64+34, 35, 64+35,
9325                36, 64+36, 37, 64+37,
9326                38, 64+38, 39, 64+39,
9327                48, 64+48, 49, 64+49,
9328                50, 64+50, 51, 64+51,
9329                52, 64+52, 53, 64+53,
9330                54, 64+54, 55, 64+55,
9331            ],
9332        );
9333        transmute(r)
9334    }
9335}
9336
9337/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9338///
9339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
9340#[inline]
9341#[target_feature(enable = "avx512bw")]
9342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9343#[cfg_attr(test, assert_instr(vpunpcklbw))]
9344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9345pub const fn _mm512_mask_unpacklo_epi8(
9346    src: __m512i,
9347    k: __mmask64,
9348    a: __m512i,
9349    b: __m512i,
9350) -> __m512i {
9351    unsafe {
9352        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9353        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
9354    }
9355}
9356
9357/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
9360#[inline]
9361#[target_feature(enable = "avx512bw")]
9362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363#[cfg_attr(test, assert_instr(vpunpcklbw))]
9364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9365pub const fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9366    unsafe {
9367        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9368        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
9369    }
9370}
9371
9372/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9373///
9374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
9375#[inline]
9376#[target_feature(enable = "avx512bw,avx512vl")]
9377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9378#[cfg_attr(test, assert_instr(vpunpcklbw))]
9379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9380pub const fn _mm256_mask_unpacklo_epi8(
9381    src: __m256i,
9382    k: __mmask32,
9383    a: __m256i,
9384    b: __m256i,
9385) -> __m256i {
9386    unsafe {
9387        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9388        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
9389    }
9390}
9391
9392/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9393///
9394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
9395#[inline]
9396#[target_feature(enable = "avx512bw,avx512vl")]
9397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9398#[cfg_attr(test, assert_instr(vpunpcklbw))]
9399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9400pub const fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9401    unsafe {
9402        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9403        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
9404    }
9405}
9406
9407/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9408///
9409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
9410#[inline]
9411#[target_feature(enable = "avx512bw,avx512vl")]
9412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9413#[cfg_attr(test, assert_instr(vpunpcklbw))]
9414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9415pub const fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9416    unsafe {
9417        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9418        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
9419    }
9420}
9421
9422/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9423///
9424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
9425#[inline]
9426#[target_feature(enable = "avx512bw,avx512vl")]
9427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9428#[cfg_attr(test, assert_instr(vpunpcklbw))]
9429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9430pub const fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9431    unsafe {
9432        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9433        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
9434    }
9435}
9436
9437/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9438///
9439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
9440#[inline]
9441#[target_feature(enable = "avx512bw")]
9442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9443#[cfg_attr(test, assert_instr(vmovdqu16))]
9444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9445pub const fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
9446    unsafe {
9447        let mov = a.as_i16x32();
9448        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
9449    }
9450}
9451
9452/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9453///
9454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
9455#[inline]
9456#[target_feature(enable = "avx512bw")]
9457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9458#[cfg_attr(test, assert_instr(vmovdqu16))]
9459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9460pub const fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
9461    unsafe {
9462        let mov = a.as_i16x32();
9463        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
9464    }
9465}
9466
9467/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9468///
9469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
9470#[inline]
9471#[target_feature(enable = "avx512bw,avx512vl")]
9472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9473#[cfg_attr(test, assert_instr(vmovdqu16))]
9474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9475pub const fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
9476    unsafe {
9477        let mov = a.as_i16x16();
9478        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
9479    }
9480}
9481
9482/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9483///
9484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
9485#[inline]
9486#[target_feature(enable = "avx512bw,avx512vl")]
9487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9488#[cfg_attr(test, assert_instr(vmovdqu16))]
9489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9490pub const fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
9491    unsafe {
9492        let mov = a.as_i16x16();
9493        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
9494    }
9495}
9496
9497/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9498///
9499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
9500#[inline]
9501#[target_feature(enable = "avx512bw,avx512vl")]
9502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9503#[cfg_attr(test, assert_instr(vmovdqu16))]
9504#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9505pub const fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9506    unsafe {
9507        let mov = a.as_i16x8();
9508        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
9509    }
9510}
9511
9512/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9513///
9514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
9515#[inline]
9516#[target_feature(enable = "avx512bw,avx512vl")]
9517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9518#[cfg_attr(test, assert_instr(vmovdqu16))]
9519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9520pub const fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
9521    unsafe {
9522        let mov = a.as_i16x8();
9523        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
9524    }
9525}
9526
9527/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9528///
9529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
9530#[inline]
9531#[target_feature(enable = "avx512bw")]
9532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9533#[cfg_attr(test, assert_instr(vmovdqu8))]
9534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9535pub const fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
9536    unsafe {
9537        let mov = a.as_i8x64();
9538        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
9539    }
9540}
9541
9542/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9543///
9544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
9545#[inline]
9546#[target_feature(enable = "avx512bw")]
9547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9548#[cfg_attr(test, assert_instr(vmovdqu8))]
9549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9550pub const fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
9551    unsafe {
9552        let mov = a.as_i8x64();
9553        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
9554    }
9555}
9556
9557/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9558///
9559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
9560#[inline]
9561#[target_feature(enable = "avx512bw,avx512vl")]
9562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9563#[cfg_attr(test, assert_instr(vmovdqu8))]
9564#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9565pub const fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
9566    unsafe {
9567        let mov = a.as_i8x32();
9568        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
9569    }
9570}
9571
9572/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
9575#[inline]
9576#[target_feature(enable = "avx512bw,avx512vl")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vmovdqu8))]
9579#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9580pub const fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
9581    unsafe {
9582        let mov = a.as_i8x32();
9583        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
9584    }
9585}
9586
9587/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9588///
9589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
9590#[inline]
9591#[target_feature(enable = "avx512bw,avx512vl")]
9592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9593#[cfg_attr(test, assert_instr(vmovdqu8))]
9594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9595pub const fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
9596    unsafe {
9597        let mov = a.as_i8x16();
9598        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
9599    }
9600}
9601
9602/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9603///
9604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
9605#[inline]
9606#[target_feature(enable = "avx512bw,avx512vl")]
9607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9608#[cfg_attr(test, assert_instr(vmovdqu8))]
9609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9610pub const fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
9611    unsafe {
9612        let mov = a.as_i8x16();
9613        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
9614    }
9615}
9616
9617/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9618///
9619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
9620#[inline]
9621#[target_feature(enable = "avx512bw")]
9622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9623#[cfg_attr(test, assert_instr(vpbroadcastw))]
9624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9625pub const fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
9626    unsafe {
9627        let r = _mm512_set1_epi16(a).as_i16x32();
9628        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
9629    }
9630}
9631
9632/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9633///
9634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
9635#[inline]
9636#[target_feature(enable = "avx512bw")]
9637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9638#[cfg_attr(test, assert_instr(vpbroadcastw))]
9639#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9640pub const fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
9641    unsafe {
9642        let r = _mm512_set1_epi16(a).as_i16x32();
9643        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
9644    }
9645}
9646
9647/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9648///
9649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
9650#[inline]
9651#[target_feature(enable = "avx512bw,avx512vl")]
9652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9653#[cfg_attr(test, assert_instr(vpbroadcastw))]
9654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9655pub const fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
9656    unsafe {
9657        let r = _mm256_set1_epi16(a).as_i16x16();
9658        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
9659    }
9660}
9661
9662/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9663///
9664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
9665#[inline]
9666#[target_feature(enable = "avx512bw,avx512vl")]
9667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9668#[cfg_attr(test, assert_instr(vpbroadcastw))]
9669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9670pub const fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
9671    unsafe {
9672        let r = _mm256_set1_epi16(a).as_i16x16();
9673        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
9674    }
9675}
9676
9677/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9678///
9679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
9680#[inline]
9681#[target_feature(enable = "avx512bw,avx512vl")]
9682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9683#[cfg_attr(test, assert_instr(vpbroadcastw))]
9684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9685pub const fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
9686    unsafe {
9687        let r = _mm_set1_epi16(a).as_i16x8();
9688        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
9689    }
9690}
9691
9692/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9693///
9694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
9695#[inline]
9696#[target_feature(enable = "avx512bw,avx512vl")]
9697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9698#[cfg_attr(test, assert_instr(vpbroadcastw))]
9699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9700pub const fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
9701    unsafe {
9702        let r = _mm_set1_epi16(a).as_i16x8();
9703        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
9704    }
9705}
9706
9707/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9708///
9709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
9710#[inline]
9711#[target_feature(enable = "avx512bw")]
9712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9713#[cfg_attr(test, assert_instr(vpbroadcast))]
9714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9715pub const fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
9716    unsafe {
9717        let r = _mm512_set1_epi8(a).as_i8x64();
9718        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
9719    }
9720}
9721
9722/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9723///
9724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
9725#[inline]
9726#[target_feature(enable = "avx512bw")]
9727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9728#[cfg_attr(test, assert_instr(vpbroadcast))]
9729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9730pub const fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
9731    unsafe {
9732        let r = _mm512_set1_epi8(a).as_i8x64();
9733        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
9734    }
9735}
9736
9737/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
9740#[inline]
9741#[target_feature(enable = "avx512bw,avx512vl")]
9742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9743#[cfg_attr(test, assert_instr(vpbroadcast))]
9744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9745pub const fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
9746    unsafe {
9747        let r = _mm256_set1_epi8(a).as_i8x32();
9748        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
9749    }
9750}
9751
9752/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9753///
9754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
9755#[inline]
9756#[target_feature(enable = "avx512bw,avx512vl")]
9757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9758#[cfg_attr(test, assert_instr(vpbroadcast))]
9759#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9760pub const fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
9761    unsafe {
9762        let r = _mm256_set1_epi8(a).as_i8x32();
9763        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
9764    }
9765}
9766
9767/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9768///
9769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9770#[inline]
9771#[target_feature(enable = "avx512bw,avx512vl")]
9772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9773#[cfg_attr(test, assert_instr(vpbroadcast))]
9774#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9775pub const fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9776    unsafe {
9777        let r = _mm_set1_epi8(a).as_i8x16();
9778        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9779    }
9780}
9781
9782/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9783///
9784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9785#[inline]
9786#[target_feature(enable = "avx512bw,avx512vl")]
9787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9788#[cfg_attr(test, assert_instr(vpbroadcast))]
9789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9790pub const fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9791    unsafe {
9792        let r = _mm_set1_epi8(a).as_i8x16();
9793        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9794    }
9795}
9796
9797/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9798///
9799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9800#[inline]
9801#[target_feature(enable = "avx512bw")]
9802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9803#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9804#[rustc_legacy_const_generics(1)]
9805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9806pub const fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9807    unsafe {
9808        static_assert_uimm_bits!(IMM8, 8);
9809        let a = a.as_i16x32();
9810        let r: i16x32 = simd_shuffle!(
9811            a,
9812            a,
9813            [
9814                IMM8 as u32 & 0b11,
9815                (IMM8 as u32 >> 2) & 0b11,
9816                (IMM8 as u32 >> 4) & 0b11,
9817                (IMM8 as u32 >> 6) & 0b11,
9818                4,
9819                5,
9820                6,
9821                7,
9822                (IMM8 as u32 & 0b11) + 8,
9823                ((IMM8 as u32 >> 2) & 0b11) + 8,
9824                ((IMM8 as u32 >> 4) & 0b11) + 8,
9825                ((IMM8 as u32 >> 6) & 0b11) + 8,
9826                12,
9827                13,
9828                14,
9829                15,
9830                (IMM8 as u32 & 0b11) + 16,
9831                ((IMM8 as u32 >> 2) & 0b11) + 16,
9832                ((IMM8 as u32 >> 4) & 0b11) + 16,
9833                ((IMM8 as u32 >> 6) & 0b11) + 16,
9834                20,
9835                21,
9836                22,
9837                23,
9838                (IMM8 as u32 & 0b11) + 24,
9839                ((IMM8 as u32 >> 2) & 0b11) + 24,
9840                ((IMM8 as u32 >> 4) & 0b11) + 24,
9841                ((IMM8 as u32 >> 6) & 0b11) + 24,
9842                28,
9843                29,
9844                30,
9845                31,
9846            ],
9847        );
9848        transmute(r)
9849    }
9850}
9851
9852/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9853///
9854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9855#[inline]
9856#[target_feature(enable = "avx512bw")]
9857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9858#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9859#[rustc_legacy_const_generics(3)]
9860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9861pub const fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9862    src: __m512i,
9863    k: __mmask32,
9864    a: __m512i,
9865) -> __m512i {
9866    unsafe {
9867        static_assert_uimm_bits!(IMM8, 8);
9868        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9869        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9870    }
9871}
9872
9873/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9874///
9875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9876#[inline]
9877#[target_feature(enable = "avx512bw")]
9878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9879#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9880#[rustc_legacy_const_generics(2)]
9881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9882pub const fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9883    unsafe {
9884        static_assert_uimm_bits!(IMM8, 8);
9885        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9886        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9887    }
9888}
9889
9890/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9891///
9892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9893#[inline]
9894#[target_feature(enable = "avx512bw,avx512vl")]
9895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9897#[rustc_legacy_const_generics(3)]
9898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9899pub const fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9900    src: __m256i,
9901    k: __mmask16,
9902    a: __m256i,
9903) -> __m256i {
9904    unsafe {
9905        static_assert_uimm_bits!(IMM8, 8);
9906        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9907        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9908    }
9909}
9910
9911/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9912///
9913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9914#[inline]
9915#[target_feature(enable = "avx512bw,avx512vl")]
9916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9917#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9918#[rustc_legacy_const_generics(2)]
9919#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9920pub const fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9921    unsafe {
9922        static_assert_uimm_bits!(IMM8, 8);
9923        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9924        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9925    }
9926}
9927
9928/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9929///
9930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9931#[inline]
9932#[target_feature(enable = "avx512bw,avx512vl")]
9933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9934#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9935#[rustc_legacy_const_generics(3)]
9936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9937pub const fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
9938    src: __m128i,
9939    k: __mmask8,
9940    a: __m128i,
9941) -> __m128i {
9942    unsafe {
9943        static_assert_uimm_bits!(IMM8, 8);
9944        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9945        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9946    }
9947}
9948
9949/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9950///
9951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9952#[inline]
9953#[target_feature(enable = "avx512bw,avx512vl")]
9954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9955#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9956#[rustc_legacy_const_generics(2)]
9957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9958pub const fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9959    unsafe {
9960        static_assert_uimm_bits!(IMM8, 8);
9961        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9962        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9963    }
9964}
9965
9966/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9969#[inline]
9970#[target_feature(enable = "avx512bw")]
9971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9972#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9973#[rustc_legacy_const_generics(1)]
9974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9975pub const fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9976    unsafe {
9977        static_assert_uimm_bits!(IMM8, 8);
9978        let a = a.as_i16x32();
9979        let r: i16x32 = simd_shuffle!(
9980            a,
9981            a,
9982            [
9983                0,
9984                1,
9985                2,
9986                3,
9987                (IMM8 as u32 & 0b11) + 4,
9988                ((IMM8 as u32 >> 2) & 0b11) + 4,
9989                ((IMM8 as u32 >> 4) & 0b11) + 4,
9990                ((IMM8 as u32 >> 6) & 0b11) + 4,
9991                8,
9992                9,
9993                10,
9994                11,
9995                (IMM8 as u32 & 0b11) + 12,
9996                ((IMM8 as u32 >> 2) & 0b11) + 12,
9997                ((IMM8 as u32 >> 4) & 0b11) + 12,
9998                ((IMM8 as u32 >> 6) & 0b11) + 12,
9999                16,
10000                17,
10001                18,
10002                19,
10003                (IMM8 as u32 & 0b11) + 20,
10004                ((IMM8 as u32 >> 2) & 0b11) + 20,
10005                ((IMM8 as u32 >> 4) & 0b11) + 20,
10006                ((IMM8 as u32 >> 6) & 0b11) + 20,
10007                24,
10008                25,
10009                26,
10010                27,
10011                (IMM8 as u32 & 0b11) + 28,
10012                ((IMM8 as u32 >> 2) & 0b11) + 28,
10013                ((IMM8 as u32 >> 4) & 0b11) + 28,
10014                ((IMM8 as u32 >> 6) & 0b11) + 28,
10015            ],
10016        );
10017        transmute(r)
10018    }
10019}
10020
10021/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10022///
10023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
10024#[inline]
10025#[target_feature(enable = "avx512bw")]
10026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10027#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10028#[rustc_legacy_const_generics(3)]
10029#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10030pub const fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
10031    src: __m512i,
10032    k: __mmask32,
10033    a: __m512i,
10034) -> __m512i {
10035    unsafe {
10036        static_assert_uimm_bits!(IMM8, 8);
10037        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10038        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
10039    }
10040}
10041
10042/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10043///
10044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
10045#[inline]
10046#[target_feature(enable = "avx512bw")]
10047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10048#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10049#[rustc_legacy_const_generics(2)]
10050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10051pub const fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
10052    unsafe {
10053        static_assert_uimm_bits!(IMM8, 8);
10054        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10055        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
10056    }
10057}
10058
10059/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10060///
10061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
10062#[inline]
10063#[target_feature(enable = "avx512bw,avx512vl")]
10064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10065#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10066#[rustc_legacy_const_generics(3)]
10067#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10068pub const fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
10069    src: __m256i,
10070    k: __mmask16,
10071    a: __m256i,
10072) -> __m256i {
10073    unsafe {
10074        static_assert_uimm_bits!(IMM8, 8);
10075        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10076        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
10077    }
10078}
10079
10080/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
10083#[inline]
10084#[target_feature(enable = "avx512bw,avx512vl")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10087#[rustc_legacy_const_generics(2)]
10088#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10089pub const fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
10090    unsafe {
10091        static_assert_uimm_bits!(IMM8, 8);
10092        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10093        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
10094    }
10095}
10096
10097/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10098///
10099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
10100#[inline]
10101#[target_feature(enable = "avx512bw,avx512vl")]
10102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10103#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10104#[rustc_legacy_const_generics(3)]
10105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10106pub const fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
10107    src: __m128i,
10108    k: __mmask8,
10109    a: __m128i,
10110) -> __m128i {
10111    unsafe {
10112        static_assert_uimm_bits!(IMM8, 8);
10113        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10114        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
10115    }
10116}
10117
10118/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10119///
10120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
10121#[inline]
10122#[target_feature(enable = "avx512bw,avx512vl")]
10123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10124#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10125#[rustc_legacy_const_generics(2)]
10126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10127pub const fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
10128    unsafe {
10129        static_assert_uimm_bits!(IMM8, 8);
10130        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10131        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
10132    }
10133}
10134
10135/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
10136///
10137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
10138#[inline]
10139#[target_feature(enable = "avx512bw")]
10140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10141#[cfg_attr(test, assert_instr(vpshufb))]
10142pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
10143    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
10144}
10145
10146/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10147///
10148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
10149#[inline]
10150#[target_feature(enable = "avx512bw")]
10151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10152#[cfg_attr(test, assert_instr(vpshufb))]
10153pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10154    unsafe {
10155        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10156        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
10157    }
10158}
10159
10160/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10161///
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
10163#[inline]
10164#[target_feature(enable = "avx512bw")]
10165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10166#[cfg_attr(test, assert_instr(vpshufb))]
10167pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10168    unsafe {
10169        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10170        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
10171    }
10172}
10173
10174/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10175///
10176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
10177#[inline]
10178#[target_feature(enable = "avx512bw,avx512vl")]
10179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10180#[cfg_attr(test, assert_instr(vpshufb))]
10181pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10182    unsafe {
10183        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10184        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
10185    }
10186}
10187
10188/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10189///
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
10191#[inline]
10192#[target_feature(enable = "avx512bw,avx512vl")]
10193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10194#[cfg_attr(test, assert_instr(vpshufb))]
10195pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10196    unsafe {
10197        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10198        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
10199    }
10200}
10201
10202/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10203///
10204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
10205#[inline]
10206#[target_feature(enable = "avx512bw,avx512vl")]
10207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10208#[cfg_attr(test, assert_instr(vpshufb))]
10209pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10210    unsafe {
10211        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10212        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
10213    }
10214}
10215
10216/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10217///
10218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
10219#[inline]
10220#[target_feature(enable = "avx512bw,avx512vl")]
10221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10222#[cfg_attr(test, assert_instr(vpshufb))]
10223pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10224    unsafe {
10225        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10226        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
10227    }
10228}
10229
10230/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10231///
10232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
10233#[inline]
10234#[target_feature(enable = "avx512bw")]
10235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10236#[cfg_attr(test, assert_instr(vptestmw))]
10237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10238pub const fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10239    let and = _mm512_and_si512(a, b);
10240    let zero = _mm512_setzero_si512();
10241    _mm512_cmpneq_epi16_mask(and, zero)
10242}
10243
10244/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10245///
10246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
10247#[inline]
10248#[target_feature(enable = "avx512bw")]
10249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10250#[cfg_attr(test, assert_instr(vptestmw))]
10251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10252pub const fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10253    let and = _mm512_and_si512(a, b);
10254    let zero = _mm512_setzero_si512();
10255    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
10256}
10257
10258/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10259///
10260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
10261#[inline]
10262#[target_feature(enable = "avx512bw,avx512vl")]
10263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10264#[cfg_attr(test, assert_instr(vptestmw))]
10265#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10266pub const fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10267    let and = _mm256_and_si256(a, b);
10268    let zero = _mm256_setzero_si256();
10269    _mm256_cmpneq_epi16_mask(and, zero)
10270}
10271
10272/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10273///
10274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
10275#[inline]
10276#[target_feature(enable = "avx512bw,avx512vl")]
10277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10278#[cfg_attr(test, assert_instr(vptestmw))]
10279#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10280pub const fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10281    let and = _mm256_and_si256(a, b);
10282    let zero = _mm256_setzero_si256();
10283    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
10284}
10285
10286/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10287///
10288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
10289#[inline]
10290#[target_feature(enable = "avx512bw,avx512vl")]
10291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10292#[cfg_attr(test, assert_instr(vptestmw))]
10293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10294pub const fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10295    let and = _mm_and_si128(a, b);
10296    let zero = _mm_setzero_si128();
10297    _mm_cmpneq_epi16_mask(and, zero)
10298}
10299
10300/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10301///
10302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
10303#[inline]
10304#[target_feature(enable = "avx512bw,avx512vl")]
10305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10306#[cfg_attr(test, assert_instr(vptestmw))]
10307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10308pub const fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10309    let and = _mm_and_si128(a, b);
10310    let zero = _mm_setzero_si128();
10311    _mm_mask_cmpneq_epi16_mask(k, and, zero)
10312}
10313
10314/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10315///
10316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
10317#[inline]
10318#[target_feature(enable = "avx512bw")]
10319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10320#[cfg_attr(test, assert_instr(vptestmb))]
10321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10322pub const fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10323    let and = _mm512_and_si512(a, b);
10324    let zero = _mm512_setzero_si512();
10325    _mm512_cmpneq_epi8_mask(and, zero)
10326}
10327
10328/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10329///
10330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
10331#[inline]
10332#[target_feature(enable = "avx512bw")]
10333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10334#[cfg_attr(test, assert_instr(vptestmb))]
10335#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10336pub const fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10337    let and = _mm512_and_si512(a, b);
10338    let zero = _mm512_setzero_si512();
10339    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
10340}
10341
10342/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10343///
10344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
10345#[inline]
10346#[target_feature(enable = "avx512bw,avx512vl")]
10347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10348#[cfg_attr(test, assert_instr(vptestmb))]
10349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10350pub const fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10351    let and = _mm256_and_si256(a, b);
10352    let zero = _mm256_setzero_si256();
10353    _mm256_cmpneq_epi8_mask(and, zero)
10354}
10355
10356/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
10359#[inline]
10360#[target_feature(enable = "avx512bw,avx512vl")]
10361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10362#[cfg_attr(test, assert_instr(vptestmb))]
10363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10364pub const fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10365    let and = _mm256_and_si256(a, b);
10366    let zero = _mm256_setzero_si256();
10367    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
10368}
10369
10370/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10371///
10372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
10373#[inline]
10374#[target_feature(enable = "avx512bw,avx512vl")]
10375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10376#[cfg_attr(test, assert_instr(vptestmb))]
10377#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10378pub const fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10379    let and = _mm_and_si128(a, b);
10380    let zero = _mm_setzero_si128();
10381    _mm_cmpneq_epi8_mask(and, zero)
10382}
10383
10384/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10385///
10386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
10387#[inline]
10388#[target_feature(enable = "avx512bw,avx512vl")]
10389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10390#[cfg_attr(test, assert_instr(vptestmb))]
10391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10392pub const fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10393    let and = _mm_and_si128(a, b);
10394    let zero = _mm_setzero_si128();
10395    _mm_mask_cmpneq_epi8_mask(k, and, zero)
10396}
10397
10398/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10399///
10400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
10401#[inline]
10402#[target_feature(enable = "avx512bw")]
10403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10404#[cfg_attr(test, assert_instr(vptestnmw))]
10405#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10406pub const fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10407    let and = _mm512_and_si512(a, b);
10408    let zero = _mm512_setzero_si512();
10409    _mm512_cmpeq_epi16_mask(and, zero)
10410}
10411
10412/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
10415#[inline]
10416#[target_feature(enable = "avx512bw")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418#[cfg_attr(test, assert_instr(vptestnmw))]
10419#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10420pub const fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10421    let and = _mm512_and_si512(a, b);
10422    let zero = _mm512_setzero_si512();
10423    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
10424}
10425
10426/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10427///
10428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
10429#[inline]
10430#[target_feature(enable = "avx512bw,avx512vl")]
10431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10432#[cfg_attr(test, assert_instr(vptestnmw))]
10433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10434pub const fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10435    let and = _mm256_and_si256(a, b);
10436    let zero = _mm256_setzero_si256();
10437    _mm256_cmpeq_epi16_mask(and, zero)
10438}
10439
10440/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10441///
10442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
10443#[inline]
10444#[target_feature(enable = "avx512bw,avx512vl")]
10445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10446#[cfg_attr(test, assert_instr(vptestnmw))]
10447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10448pub const fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10449    let and = _mm256_and_si256(a, b);
10450    let zero = _mm256_setzero_si256();
10451    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
10452}
10453
10454/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10455///
10456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
10457#[inline]
10458#[target_feature(enable = "avx512bw,avx512vl")]
10459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10460#[cfg_attr(test, assert_instr(vptestnmw))]
10461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10462pub const fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10463    let and = _mm_and_si128(a, b);
10464    let zero = _mm_setzero_si128();
10465    _mm_cmpeq_epi16_mask(and, zero)
10466}
10467
10468/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10469///
10470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
10471#[inline]
10472#[target_feature(enable = "avx512bw,avx512vl")]
10473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10474#[cfg_attr(test, assert_instr(vptestnmw))]
10475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10476pub const fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10477    let and = _mm_and_si128(a, b);
10478    let zero = _mm_setzero_si128();
10479    _mm_mask_cmpeq_epi16_mask(k, and, zero)
10480}
10481
10482/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10483///
10484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
10485#[inline]
10486#[target_feature(enable = "avx512bw")]
10487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10488#[cfg_attr(test, assert_instr(vptestnmb))]
10489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10490pub const fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10491    let and = _mm512_and_si512(a, b);
10492    let zero = _mm512_setzero_si512();
10493    _mm512_cmpeq_epi8_mask(and, zero)
10494}
10495
10496/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10497///
10498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
10499#[inline]
10500#[target_feature(enable = "avx512bw")]
10501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10502#[cfg_attr(test, assert_instr(vptestnmb))]
10503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10504pub const fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10505    let and = _mm512_and_si512(a, b);
10506    let zero = _mm512_setzero_si512();
10507    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
10508}
10509
10510/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10511///
10512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
10513#[inline]
10514#[target_feature(enable = "avx512bw,avx512vl")]
10515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10516#[cfg_attr(test, assert_instr(vptestnmb))]
10517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10518pub const fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10519    let and = _mm256_and_si256(a, b);
10520    let zero = _mm256_setzero_si256();
10521    _mm256_cmpeq_epi8_mask(and, zero)
10522}
10523
10524/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10525///
10526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
10527#[inline]
10528#[target_feature(enable = "avx512bw,avx512vl")]
10529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530#[cfg_attr(test, assert_instr(vptestnmb))]
10531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10532pub const fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10533    let and = _mm256_and_si256(a, b);
10534    let zero = _mm256_setzero_si256();
10535    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
10536}
10537
10538/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10539///
10540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
10541#[inline]
10542#[target_feature(enable = "avx512bw,avx512vl")]
10543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10544#[cfg_attr(test, assert_instr(vptestnmb))]
10545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10546pub const fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10547    let and = _mm_and_si128(a, b);
10548    let zero = _mm_setzero_si128();
10549    _mm_cmpeq_epi8_mask(and, zero)
10550}
10551
10552/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10553///
10554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
10555#[inline]
10556#[target_feature(enable = "avx512bw,avx512vl")]
10557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558#[cfg_attr(test, assert_instr(vptestnmb))]
10559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10560pub const fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10561    let and = _mm_and_si128(a, b);
10562    let zero = _mm_setzero_si128();
10563    _mm_mask_cmpeq_epi8_mask(k, and, zero)
10564}
10565
10566/// Store 64-bit mask from a into memory.
10567///
10568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
10569#[inline]
10570#[target_feature(enable = "avx512bw")]
10571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10572#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10574pub const unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
10575    ptr::write(mem_addr as *mut __mmask64, a);
10576}
10577
10578/// Store 32-bit mask from a into memory.
10579///
10580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
10581#[inline]
10582#[target_feature(enable = "avx512bw")]
10583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10584#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10586pub const unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
10587    ptr::write(mem_addr as *mut __mmask32, a);
10588}
10589
10590/// Load 64-bit mask from memory into k.
10591///
10592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
10593#[inline]
10594#[target_feature(enable = "avx512bw")]
10595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10596#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10598pub const unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
10599    ptr::read(mem_addr as *const __mmask64)
10600}
10601
10602/// Load 32-bit mask from memory into k.
10603///
10604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
10605#[inline]
10606#[target_feature(enable = "avx512bw")]
10607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10608#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10610pub const unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
10611    ptr::read(mem_addr as *const __mmask32)
10612}
10613
10614/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
10615///
10616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
10617#[inline]
10618#[target_feature(enable = "avx512bw")]
10619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10620#[cfg_attr(test, assert_instr(vpsadbw))]
10621pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
10622    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
10623}
10624
10625/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10626///
10627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
10628#[inline]
10629#[target_feature(enable = "avx512bw")]
10630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10631#[rustc_legacy_const_generics(2)]
10632#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10633pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
10634    unsafe {
10635        static_assert_uimm_bits!(IMM8, 8);
10636        let a = a.as_u8x64();
10637        let b = b.as_u8x64();
10638        let r = vdbpsadbw(a, b, IMM8);
10639        transmute(r)
10640    }
10641}
10642
10643/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10644///
10645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
10646#[inline]
10647#[target_feature(enable = "avx512bw")]
10648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10649#[rustc_legacy_const_generics(4)]
10650#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10651pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
10652    src: __m512i,
10653    k: __mmask32,
10654    a: __m512i,
10655    b: __m512i,
10656) -> __m512i {
10657    unsafe {
10658        static_assert_uimm_bits!(IMM8, 8);
10659        let a = a.as_u8x64();
10660        let b = b.as_u8x64();
10661        let r = vdbpsadbw(a, b, IMM8);
10662        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
10663    }
10664}
10665
10666/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10667///
10668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
10669#[inline]
10670#[target_feature(enable = "avx512bw")]
10671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10672#[rustc_legacy_const_generics(3)]
10673#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10674pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
10675    unsafe {
10676        static_assert_uimm_bits!(IMM8, 8);
10677        let a = a.as_u8x64();
10678        let b = b.as_u8x64();
10679        let r = vdbpsadbw(a, b, IMM8);
10680        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
10681    }
10682}
10683
10684/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10685///
10686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
10687#[inline]
10688#[target_feature(enable = "avx512bw,avx512vl")]
10689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10690#[rustc_legacy_const_generics(2)]
10691#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10692pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
10693    unsafe {
10694        static_assert_uimm_bits!(IMM8, 8);
10695        let a = a.as_u8x32();
10696        let b = b.as_u8x32();
10697        let r = vdbpsadbw256(a, b, IMM8);
10698        transmute(r)
10699    }
10700}
10701
10702/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10703///
10704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
10705#[inline]
10706#[target_feature(enable = "avx512bw,avx512vl")]
10707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10708#[rustc_legacy_const_generics(4)]
10709#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10710pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
10711    src: __m256i,
10712    k: __mmask16,
10713    a: __m256i,
10714    b: __m256i,
10715) -> __m256i {
10716    unsafe {
10717        static_assert_uimm_bits!(IMM8, 8);
10718        let a = a.as_u8x32();
10719        let b = b.as_u8x32();
10720        let r = vdbpsadbw256(a, b, IMM8);
10721        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
10722    }
10723}
10724
10725/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10726///
10727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
10728#[inline]
10729#[target_feature(enable = "avx512bw,avx512vl")]
10730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10731#[rustc_legacy_const_generics(3)]
10732#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10733pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
10734    unsafe {
10735        static_assert_uimm_bits!(IMM8, 8);
10736        let a = a.as_u8x32();
10737        let b = b.as_u8x32();
10738        let r = vdbpsadbw256(a, b, IMM8);
10739        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
10740    }
10741}
10742
10743/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10744///
10745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
10746#[inline]
10747#[target_feature(enable = "avx512bw,avx512vl")]
10748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10749#[rustc_legacy_const_generics(2)]
10750#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10751pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
10752    unsafe {
10753        static_assert_uimm_bits!(IMM8, 8);
10754        let a = a.as_u8x16();
10755        let b = b.as_u8x16();
10756        let r = vdbpsadbw128(a, b, IMM8);
10757        transmute(r)
10758    }
10759}
10760
10761/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10762///
10763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
10764#[inline]
10765#[target_feature(enable = "avx512bw,avx512vl")]
10766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10767#[rustc_legacy_const_generics(4)]
10768#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10769pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
10770    src: __m128i,
10771    k: __mmask8,
10772    a: __m128i,
10773    b: __m128i,
10774) -> __m128i {
10775    unsafe {
10776        static_assert_uimm_bits!(IMM8, 8);
10777        let a = a.as_u8x16();
10778        let b = b.as_u8x16();
10779        let r = vdbpsadbw128(a, b, IMM8);
10780        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
10781    }
10782}
10783
10784/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10785///
10786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
10787#[inline]
10788#[target_feature(enable = "avx512bw,avx512vl")]
10789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10790#[rustc_legacy_const_generics(3)]
10791#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10792pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
10793    unsafe {
10794        static_assert_uimm_bits!(IMM8, 8);
10795        let a = a.as_u8x16();
10796        let b = b.as_u8x16();
10797        let r = vdbpsadbw128(a, b, IMM8);
10798        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
10799    }
10800}
10801
10802/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10803///
10804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
10805#[inline]
10806#[target_feature(enable = "avx512bw")]
10807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10808#[cfg_attr(test, assert_instr(vpmovw2m))]
10809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10810pub const fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
10811    let filter = _mm512_set1_epi16(1 << 15);
10812    let a = _mm512_and_si512(a, filter);
10813    _mm512_cmpeq_epi16_mask(a, filter)
10814}
10815
10816/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10817///
10818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
10819#[inline]
10820#[target_feature(enable = "avx512bw,avx512vl")]
10821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10822#[cfg_attr(test, assert_instr(vpmovw2m))]
10823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10824pub const fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10825    let filter = _mm256_set1_epi16(1 << 15);
10826    let a = _mm256_and_si256(a, filter);
10827    _mm256_cmpeq_epi16_mask(a, filter)
10828}
10829
10830/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10831///
10832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
10833#[inline]
10834#[target_feature(enable = "avx512bw,avx512vl")]
10835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10836#[cfg_attr(test, assert_instr(vpmovw2m))]
10837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10838pub const fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10839    let filter = _mm_set1_epi16(1 << 15);
10840    let a = _mm_and_si128(a, filter);
10841    _mm_cmpeq_epi16_mask(a, filter)
10842}
10843
10844/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10845///
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10847#[inline]
10848#[target_feature(enable = "avx512bw")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vpmovb2m))]
10851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10852pub const fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10853    let filter = _mm512_set1_epi8(1 << 7);
10854    let a = _mm512_and_si512(a, filter);
10855    _mm512_cmpeq_epi8_mask(a, filter)
10856}
10857
10858/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10859///
10860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10861#[inline]
10862#[target_feature(enable = "avx512bw,avx512vl")]
10863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10864#[cfg_attr(test, assert_instr(vpmovmskb))]
10865// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10866// using vpmovb2m plus converting the mask register to a standard register.
10867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10868pub const fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10869    let filter = _mm256_set1_epi8(1 << 7);
10870    let a = _mm256_and_si256(a, filter);
10871    _mm256_cmpeq_epi8_mask(a, filter)
10872}
10873
10874/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10877#[inline]
10878#[target_feature(enable = "avx512bw,avx512vl")]
10879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10880#[cfg_attr(test, assert_instr(vpmovmskb))]
10881// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10882// using vpmovb2m plus converting the mask register to a standard register.
10883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10884pub const fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10885    let filter = _mm_set1_epi8(1 << 7);
10886    let a = _mm_and_si128(a, filter);
10887    _mm_cmpeq_epi8_mask(a, filter)
10888}
10889
10890/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10891///
10892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10893#[inline]
10894#[target_feature(enable = "avx512bw")]
10895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10896#[cfg_attr(test, assert_instr(vpmovm2w))]
10897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10898pub const fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10899    unsafe {
10900        let one = _mm512_set1_epi16(
10901            1 << 15
10902                | 1 << 14
10903                | 1 << 13
10904                | 1 << 12
10905                | 1 << 11
10906                | 1 << 10
10907                | 1 << 9
10908                | 1 << 8
10909                | 1 << 7
10910                | 1 << 6
10911                | 1 << 5
10912                | 1 << 4
10913                | 1 << 3
10914                | 1 << 2
10915                | 1 << 1
10916                | 1 << 0,
10917        )
10918        .as_i16x32();
10919        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10920    }
10921}
10922
10923/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10924///
10925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10926#[inline]
10927#[target_feature(enable = "avx512bw,avx512vl")]
10928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10929#[cfg_attr(test, assert_instr(vpmovm2w))]
10930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10931pub const fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10932    unsafe {
10933        let one = _mm256_set1_epi16(
10934            1 << 15
10935                | 1 << 14
10936                | 1 << 13
10937                | 1 << 12
10938                | 1 << 11
10939                | 1 << 10
10940                | 1 << 9
10941                | 1 << 8
10942                | 1 << 7
10943                | 1 << 6
10944                | 1 << 5
10945                | 1 << 4
10946                | 1 << 3
10947                | 1 << 2
10948                | 1 << 1
10949                | 1 << 0,
10950        )
10951        .as_i16x16();
10952        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10953    }
10954}
10955
10956/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10957///
10958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10959#[inline]
10960#[target_feature(enable = "avx512bw,avx512vl")]
10961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10962#[cfg_attr(test, assert_instr(vpmovm2w))]
10963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10964pub const fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10965    unsafe {
10966        let one = _mm_set1_epi16(
10967            1 << 15
10968                | 1 << 14
10969                | 1 << 13
10970                | 1 << 12
10971                | 1 << 11
10972                | 1 << 10
10973                | 1 << 9
10974                | 1 << 8
10975                | 1 << 7
10976                | 1 << 6
10977                | 1 << 5
10978                | 1 << 4
10979                | 1 << 3
10980                | 1 << 2
10981                | 1 << 1
10982                | 1 << 0,
10983        )
10984        .as_i16x8();
10985        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10986    }
10987}
10988
10989/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10990///
10991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10992#[inline]
10993#[target_feature(enable = "avx512bw")]
10994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10995#[cfg_attr(test, assert_instr(vpmovm2b))]
10996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10997pub const fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10998    unsafe {
10999        let one =
11000            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11001                .as_i8x64();
11002        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
11003    }
11004}
11005
11006/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11007///
11008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
11009#[inline]
11010#[target_feature(enable = "avx512bw,avx512vl")]
11011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11012#[cfg_attr(test, assert_instr(vpmovm2b))]
11013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11014pub const fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
11015    unsafe {
11016        let one =
11017            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11018                .as_i8x32();
11019        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
11020    }
11021}
11022
11023/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11024///
11025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
11026#[inline]
11027#[target_feature(enable = "avx512bw,avx512vl")]
11028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029#[cfg_attr(test, assert_instr(vpmovm2b))]
11030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11031pub const fn _mm_movm_epi8(k: __mmask16) -> __m128i {
11032    unsafe {
11033        let one =
11034            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11035                .as_i8x16();
11036        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
11037    }
11038}
11039
11040/// Convert 32-bit mask a into an integer value, and store the result in dst.
11041///
11042/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
11043#[inline]
11044#[target_feature(enable = "avx512bw")]
11045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11047pub const fn _cvtmask32_u32(a: __mmask32) -> u32 {
11048    a
11049}
11050
11051/// Convert integer value a into an 32-bit mask, and store the result in k.
11052///
11053/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
11054#[inline]
11055#[target_feature(enable = "avx512bw")]
11056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11058pub const fn _cvtu32_mask32(a: u32) -> __mmask32 {
11059    a
11060}
11061
11062/// Add 32-bit masks in a and b, and store the result in k.
11063///
11064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
11065#[inline]
11066#[target_feature(enable = "avx512bw")]
11067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11069pub const fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11070    a.wrapping_add(b)
11071}
11072
11073/// Add 64-bit masks in a and b, and store the result in k.
11074///
11075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
11076#[inline]
11077#[target_feature(enable = "avx512bw")]
11078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11080pub const fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11081    a.wrapping_add(b)
11082}
11083
11084/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
11085///
11086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
11087#[inline]
11088#[target_feature(enable = "avx512bw")]
11089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11091pub const fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11092    a & b
11093}
11094
11095/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
11096///
11097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
11098#[inline]
11099#[target_feature(enable = "avx512bw")]
11100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11102pub const fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11103    a & b
11104}
11105
11106/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
11107///
11108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
11109#[inline]
11110#[target_feature(enable = "avx512bw")]
11111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11113pub const fn _knot_mask32(a: __mmask32) -> __mmask32 {
11114    !a
11115}
11116
11117/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
11118///
11119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
11120#[inline]
11121#[target_feature(enable = "avx512bw")]
11122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11124pub const fn _knot_mask64(a: __mmask64) -> __mmask64 {
11125    !a
11126}
11127
11128/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
11129///
11130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
11131#[inline]
11132#[target_feature(enable = "avx512bw")]
11133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11135pub const fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11136    _knot_mask32(a) & b
11137}
11138
11139/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
11140///
11141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
11142#[inline]
11143#[target_feature(enable = "avx512bw")]
11144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11145#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11146pub const fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11147    _knot_mask64(a) & b
11148}
11149
11150/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
11151///
11152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
11153#[inline]
11154#[target_feature(enable = "avx512bw")]
11155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11157pub const fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11158    a | b
11159}
11160
11161/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
11162///
11163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
11164#[inline]
11165#[target_feature(enable = "avx512bw")]
11166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11168pub const fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11169    a | b
11170}
11171
11172/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
11173///
11174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
11175#[inline]
11176#[target_feature(enable = "avx512bw")]
11177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11179pub const fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11180    a ^ b
11181}
11182
11183/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
11184///
11185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
11186#[inline]
11187#[target_feature(enable = "avx512bw")]
11188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11190pub const fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11191    a ^ b
11192}
11193
11194/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
11197#[inline]
11198#[target_feature(enable = "avx512bw")]
11199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11201pub const fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11202    _knot_mask32(a ^ b)
11203}
11204
11205/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
11206///
11207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
11208#[inline]
11209#[target_feature(enable = "avx512bw")]
11210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11211#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11212pub const fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11213    _knot_mask64(a ^ b)
11214}
11215
11216/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11217/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11218///
11219/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
11220#[inline]
11221#[target_feature(enable = "avx512bw")]
11222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11224pub const unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
11225    let tmp = _kor_mask32(a, b);
11226    *all_ones = (tmp == 0xffffffff) as u8;
11227    (tmp == 0) as u8
11228}
11229
11230/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11231/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11232///
11233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
11234#[inline]
11235#[target_feature(enable = "avx512bw")]
11236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11238pub const unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
11239    let tmp = _kor_mask64(a, b);
11240    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
11241    (tmp == 0) as u8
11242}
11243
11244/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11245/// store 0 in dst.
11246///
11247/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
11248#[inline]
11249#[target_feature(enable = "avx512bw")]
11250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11252pub const fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11253    (_kor_mask32(a, b) == 0xffffffff) as u8
11254}
11255
11256/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11257/// store 0 in dst.
11258///
11259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
11260#[inline]
11261#[target_feature(enable = "avx512bw")]
11262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11264pub const fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11265    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
11266}
11267
11268/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11269/// store 0 in dst.
11270///
11271/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
11272#[inline]
11273#[target_feature(enable = "avx512bw")]
11274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11276pub const fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11277    (_kor_mask32(a, b) == 0) as u8
11278}
11279
11280/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11281/// store 0 in dst.
11282///
11283/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
11284#[inline]
11285#[target_feature(enable = "avx512bw")]
11286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11288pub const fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11289    (_kor_mask64(a, b) == 0) as u8
11290}
11291
11292/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11293///
11294/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
11295#[inline]
11296#[target_feature(enable = "avx512bw")]
11297#[rustc_legacy_const_generics(1)]
11298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11300pub const fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11301    a.unbounded_shl(COUNT)
11302}
11303
11304/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11305///
11306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
11307#[inline]
11308#[target_feature(enable = "avx512bw")]
11309#[rustc_legacy_const_generics(1)]
11310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11312pub const fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11313    a.unbounded_shl(COUNT)
11314}
11315
11316/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11317///
11318/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
11319#[inline]
11320#[target_feature(enable = "avx512bw")]
11321#[rustc_legacy_const_generics(1)]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11324pub const fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11325    a.unbounded_shr(COUNT)
11326}
11327
11328/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11329///
11330/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
11331#[inline]
11332#[target_feature(enable = "avx512bw")]
11333#[rustc_legacy_const_generics(1)]
11334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11335#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11336pub const fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11337    a.unbounded_shr(COUNT)
11338}
11339
11340/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
11341/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11342/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11343///
11344/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
11345#[inline]
11346#[target_feature(enable = "avx512bw")]
11347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11349pub const unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
11350    *and_not = (_kandn_mask32(a, b) == 0) as u8;
11351    (_kand_mask32(a, b) == 0) as u8
11352}
11353
11354/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
11355/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11356/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11357///
11358/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
11359#[inline]
11360#[target_feature(enable = "avx512bw")]
11361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11363pub const unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
11364    *and_not = (_kandn_mask64(a, b) == 0) as u8;
11365    (_kand_mask64(a, b) == 0) as u8
11366}
11367
11368/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
11369/// zeros, store 1 in dst, otherwise store 0 in dst.
11370///
11371/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
11372#[inline]
11373#[target_feature(enable = "avx512bw")]
11374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11376pub const fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11377    (_kandn_mask32(a, b) == 0) as u8
11378}
11379
11380/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
11381/// zeros, store 1 in dst, otherwise store 0 in dst.
11382///
11383/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
11384#[inline]
11385#[target_feature(enable = "avx512bw")]
11386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11388pub const fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11389    (_kandn_mask64(a, b) == 0) as u8
11390}
11391
11392/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11393/// store 0 in dst.
11394///
11395/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
11396#[inline]
11397#[target_feature(enable = "avx512bw")]
11398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11400pub const fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11401    (_kand_mask32(a, b) == 0) as u8
11402}
11403
11404/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11405/// store 0 in dst.
11406///
11407/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
11408#[inline]
11409#[target_feature(enable = "avx512bw")]
11410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11412pub const fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11413    (_kand_mask64(a, b) == 0) as u8
11414}
11415
11416/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
11417///
11418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
11419#[inline]
11420#[target_feature(enable = "avx512bw")]
11421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11422#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
11423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11424pub const fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
11425    ((a & 0xffff) << 16) | (b & 0xffff)
11426}
11427
11428/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
11429///
11430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
11431#[inline]
11432#[target_feature(enable = "avx512bw")]
11433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11434#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
11435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11436pub const fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
11437    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
11438}
11439
11440/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11441///
11442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
11443#[inline]
11444#[target_feature(enable = "avx512bw")]
11445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11446#[cfg_attr(test, assert_instr(vpmovwb))]
11447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11448pub const fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
11449    unsafe {
11450        let a = a.as_i16x32();
11451        transmute::<i8x32, _>(simd_cast(a))
11452    }
11453}
11454
11455/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
11458#[inline]
11459#[target_feature(enable = "avx512bw")]
11460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11461#[cfg_attr(test, assert_instr(vpmovwb))]
11462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11463pub const fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11464    unsafe {
11465        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11466        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
11467    }
11468}
11469
11470/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11471///
11472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
11473#[inline]
11474#[target_feature(enable = "avx512bw")]
11475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11476#[cfg_attr(test, assert_instr(vpmovwb))]
11477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11478pub const fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11479    unsafe {
11480        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11481        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
11482    }
11483}
11484
11485/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11486///
11487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
11488#[inline]
11489#[target_feature(enable = "avx512bw,avx512vl")]
11490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11491#[cfg_attr(test, assert_instr(vpmovwb))]
11492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11493pub const fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
11494    unsafe {
11495        let a = a.as_i16x16();
11496        transmute::<i8x16, _>(simd_cast(a))
11497    }
11498}
11499
11500/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
11503#[inline]
11504#[target_feature(enable = "avx512bw,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[cfg_attr(test, assert_instr(vpmovwb))]
11507#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11508pub const fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11509    unsafe {
11510        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11511        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
11512    }
11513}
11514
11515/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11516///
11517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
11518#[inline]
11519#[target_feature(enable = "avx512bw,avx512vl")]
11520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11521#[cfg_attr(test, assert_instr(vpmovwb))]
11522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11523pub const fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11524    unsafe {
11525        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11526        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
11527    }
11528}
11529
11530/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11531///
11532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
11533#[inline]
11534#[target_feature(enable = "avx512bw,avx512vl")]
11535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11536#[cfg_attr(test, assert_instr(vpmovwb))]
11537#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11538pub const fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
11539    unsafe {
11540        let a = a.as_i16x8();
11541        let v256: i16x16 = simd_shuffle!(
11542            a,
11543            i16x8::ZERO,
11544            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
11545        );
11546        transmute::<i8x16, _>(simd_cast(v256))
11547    }
11548}
11549
11550/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11551///
11552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
11553#[inline]
11554#[target_feature(enable = "avx512bw,avx512vl")]
11555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556#[cfg_attr(test, assert_instr(vpmovwb))]
11557#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11558pub const fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11559    unsafe {
11560        let a = _mm_cvtepi16_epi8(a).as_i8x16();
11561        let src = simd_shuffle!(
11562            src.as_i8x16(),
11563            i8x16::ZERO,
11564            [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16]
11565        );
11566        simd_select_bitmask(k as u16, a, src).as_m128i()
11567    }
11568}
11569
11570/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11571///
11572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
11573#[inline]
11574#[target_feature(enable = "avx512bw,avx512vl")]
11575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11576#[cfg_attr(test, assert_instr(vpmovwb))]
11577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11578pub const fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11579    _mm_mask_cvtepi16_epi8(_mm_setzero_si128(), k, a)
11580}
11581
11582/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
11585#[inline]
11586#[target_feature(enable = "avx512bw")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vpmovswb))]
11589#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11590pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
11591    unsafe {
11592        simd_cast::<_, i8x32>(simd_imax(
11593            simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
11594            i16x32::splat(i8::MIN as _),
11595        ))
11596        .as_m256i()
11597    }
11598}
11599
11600/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11601///
11602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
11603#[inline]
11604#[target_feature(enable = "avx512bw")]
11605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11606#[cfg_attr(test, assert_instr(vpmovswb))]
11607#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11608pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11609    unsafe {
11610        simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
11611    }
11612}
11613
11614/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11615///
11616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
11617#[inline]
11618#[target_feature(enable = "avx512bw")]
11619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11620#[cfg_attr(test, assert_instr(vpmovswb))]
11621#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11622pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11623    unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
11624}
11625
11626/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11627///
11628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
11629#[inline]
11630#[target_feature(enable = "avx512bw,avx512vl")]
11631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11632#[cfg_attr(test, assert_instr(vpmovswb))]
11633#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11634pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
11635    unsafe {
11636        simd_cast::<_, i8x16>(simd_imax(
11637            simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
11638            i16x16::splat(i8::MIN as _),
11639        ))
11640        .as_m128i()
11641    }
11642}
11643
11644/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11645///
11646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
11647#[inline]
11648#[target_feature(enable = "avx512bw,avx512vl")]
11649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11650#[cfg_attr(test, assert_instr(vpmovswb))]
11651#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11652pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11653    unsafe {
11654        simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
11655    }
11656}
11657
11658/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11659///
11660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
11661#[inline]
11662#[target_feature(enable = "avx512bw,avx512vl")]
11663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11664#[cfg_attr(test, assert_instr(vpmovswb))]
11665#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11666pub const fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11667    unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
11668}
11669
11670/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11671///
11672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
11673#[inline]
11674#[target_feature(enable = "avx512bw,avx512vl")]
11675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11676#[cfg_attr(test, assert_instr(vpmovswb))]
11677pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
11678    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
11679}
11680
11681/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
11684#[inline]
11685#[target_feature(enable = "avx512bw,avx512vl")]
11686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11687#[cfg_attr(test, assert_instr(vpmovswb))]
11688pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
11690}
11691
11692/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11693///
11694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
11695#[inline]
11696#[target_feature(enable = "avx512bw,avx512vl")]
11697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11698#[cfg_attr(test, assert_instr(vpmovswb))]
11699pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11700    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
11701}
11702
11703/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11704///
11705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
11706#[inline]
11707#[target_feature(enable = "avx512bw")]
11708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11709#[cfg_attr(test, assert_instr(vpmovuswb))]
11710#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11711pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
11712    unsafe {
11713        simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
11714    }
11715}
11716
11717/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11718///
11719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
11720#[inline]
11721#[target_feature(enable = "avx512bw")]
11722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11723#[cfg_attr(test, assert_instr(vpmovuswb))]
11724#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11725pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11726    unsafe {
11727        simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
11728    }
11729}
11730
11731/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11732///
11733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
11734#[inline]
11735#[target_feature(enable = "avx512bw")]
11736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11737#[cfg_attr(test, assert_instr(vpmovuswb))]
11738#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11739pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11740    unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
11741}
11742
11743/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11744///
11745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
11746#[inline]
11747#[target_feature(enable = "avx512bw,avx512vl")]
11748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11749#[cfg_attr(test, assert_instr(vpmovuswb))]
11750#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11751pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
11752    unsafe {
11753        simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
11754    }
11755}
11756
11757/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11758///
11759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
11760#[inline]
11761#[target_feature(enable = "avx512bw,avx512vl")]
11762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11763#[cfg_attr(test, assert_instr(vpmovuswb))]
11764#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11765pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11766    unsafe {
11767        simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
11768    }
11769}
11770
11771/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11772///
11773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
11774#[inline]
11775#[target_feature(enable = "avx512bw,avx512vl")]
11776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11777#[cfg_attr(test, assert_instr(vpmovuswb))]
11778#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11779pub const fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11780    unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
11781}
11782
11783/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11784///
11785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
11786#[inline]
11787#[target_feature(enable = "avx512bw,avx512vl")]
11788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11789#[cfg_attr(test, assert_instr(vpmovuswb))]
11790pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
11791    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
11792}
11793
11794/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11795///
11796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
11797#[inline]
11798#[target_feature(enable = "avx512bw,avx512vl")]
11799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11800#[cfg_attr(test, assert_instr(vpmovuswb))]
11801pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11802    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
11803}
11804
11805/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11806///
11807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
11808#[inline]
11809#[target_feature(enable = "avx512bw,avx512vl")]
11810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11811#[cfg_attr(test, assert_instr(vpmovuswb))]
11812pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11813    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
11814}
11815
11816/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11817///
11818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
11819#[inline]
11820#[target_feature(enable = "avx512bw")]
11821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11822#[cfg_attr(test, assert_instr(vpmovsxbw))]
11823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11824pub const fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
11825    unsafe {
11826        let a = a.as_i8x32();
11827        transmute::<i16x32, _>(simd_cast(a))
11828    }
11829}
11830
11831/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11832///
11833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
11834#[inline]
11835#[target_feature(enable = "avx512bw")]
11836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11837#[cfg_attr(test, assert_instr(vpmovsxbw))]
11838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11839pub const fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11840    unsafe {
11841        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11842        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11843    }
11844}
11845
11846/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11847///
11848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
11849#[inline]
11850#[target_feature(enable = "avx512bw")]
11851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11852#[cfg_attr(test, assert_instr(vpmovsxbw))]
11853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11854pub const fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11855    unsafe {
11856        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11857        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11858    }
11859}
11860
11861/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11862///
11863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
11864#[inline]
11865#[target_feature(enable = "avx512bw,avx512vl")]
11866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11867#[cfg_attr(test, assert_instr(vpmovsxbw))]
11868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11869pub const fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11870    unsafe {
11871        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11872        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11873    }
11874}
11875
11876/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11877///
11878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
11879#[inline]
11880#[target_feature(enable = "avx512bw,avx512vl")]
11881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11882#[cfg_attr(test, assert_instr(vpmovsxbw))]
11883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11884pub const fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11885    unsafe {
11886        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11887        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11888    }
11889}
11890
11891/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11892///
11893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
11894#[inline]
11895#[target_feature(enable = "avx512bw,avx512vl")]
11896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11897#[cfg_attr(test, assert_instr(vpmovsxbw))]
11898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11899pub const fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11902        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11903    }
11904}
11905
11906/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
11909#[inline]
11910#[target_feature(enable = "avx512bw,avx512vl")]
11911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11912#[cfg_attr(test, assert_instr(vpmovsxbw))]
11913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11914pub const fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11915    unsafe {
11916        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11917        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11918    }
11919}
11920
11921/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11922///
11923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11924#[inline]
11925#[target_feature(enable = "avx512bw")]
11926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11927#[cfg_attr(test, assert_instr(vpmovzxbw))]
11928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11929pub const fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11930    unsafe {
11931        let a = a.as_u8x32();
11932        transmute::<i16x32, _>(simd_cast(a))
11933    }
11934}
11935
11936/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11937///
11938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11939#[inline]
11940#[target_feature(enable = "avx512bw")]
11941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11942#[cfg_attr(test, assert_instr(vpmovzxbw))]
11943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11944pub const fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11945    unsafe {
11946        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11947        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11948    }
11949}
11950
11951/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11952///
11953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11954#[inline]
11955#[target_feature(enable = "avx512bw")]
11956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11957#[cfg_attr(test, assert_instr(vpmovzxbw))]
11958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11959pub const fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11960    unsafe {
11961        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11962        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11963    }
11964}
11965
11966/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11967///
11968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11969#[inline]
11970#[target_feature(enable = "avx512bw,avx512vl")]
11971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11972#[cfg_attr(test, assert_instr(vpmovzxbw))]
11973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11974pub const fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11975    unsafe {
11976        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11977        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11978    }
11979}
11980
11981/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11982///
11983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11984#[inline]
11985#[target_feature(enable = "avx512bw,avx512vl")]
11986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11987#[cfg_attr(test, assert_instr(vpmovzxbw))]
11988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11989pub const fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11990    unsafe {
11991        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11992        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11993    }
11994}
11995
11996/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11997///
11998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11999#[inline]
12000#[target_feature(enable = "avx512bw,avx512vl")]
12001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12002#[cfg_attr(test, assert_instr(vpmovzxbw))]
12003#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12004pub const fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12005    unsafe {
12006        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12007        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12008    }
12009}
12010
12011/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12012///
12013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
12014#[inline]
12015#[target_feature(enable = "avx512bw,avx512vl")]
12016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12017#[cfg_attr(test, assert_instr(vpmovzxbw))]
12018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12019pub const fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
12020    unsafe {
12021        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12022        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12023    }
12024}
12025
12026/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
12027///
12028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
12029#[inline]
12030#[target_feature(enable = "avx512bw")]
12031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12032#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
12033#[rustc_legacy_const_generics(1)]
12034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12035pub const fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12036    unsafe {
12037        static_assert_uimm_bits!(IMM8, 8);
12038        const fn mask(shift: i32, i: u32) -> u32 {
12039            let shift = shift as u32 & 0xff;
12040            if shift > 15 || i % 16 < shift {
12041                0
12042            } else {
12043                64 + (i - shift)
12044            }
12045        }
12046        let a = a.as_i8x64();
12047        let zero = i8x64::ZERO;
12048        let r: i8x64 = simd_shuffle!(
12049            zero,
12050            a,
12051            [
12052                mask(IMM8, 0),
12053                mask(IMM8, 1),
12054                mask(IMM8, 2),
12055                mask(IMM8, 3),
12056                mask(IMM8, 4),
12057                mask(IMM8, 5),
12058                mask(IMM8, 6),
12059                mask(IMM8, 7),
12060                mask(IMM8, 8),
12061                mask(IMM8, 9),
12062                mask(IMM8, 10),
12063                mask(IMM8, 11),
12064                mask(IMM8, 12),
12065                mask(IMM8, 13),
12066                mask(IMM8, 14),
12067                mask(IMM8, 15),
12068                mask(IMM8, 16),
12069                mask(IMM8, 17),
12070                mask(IMM8, 18),
12071                mask(IMM8, 19),
12072                mask(IMM8, 20),
12073                mask(IMM8, 21),
12074                mask(IMM8, 22),
12075                mask(IMM8, 23),
12076                mask(IMM8, 24),
12077                mask(IMM8, 25),
12078                mask(IMM8, 26),
12079                mask(IMM8, 27),
12080                mask(IMM8, 28),
12081                mask(IMM8, 29),
12082                mask(IMM8, 30),
12083                mask(IMM8, 31),
12084                mask(IMM8, 32),
12085                mask(IMM8, 33),
12086                mask(IMM8, 34),
12087                mask(IMM8, 35),
12088                mask(IMM8, 36),
12089                mask(IMM8, 37),
12090                mask(IMM8, 38),
12091                mask(IMM8, 39),
12092                mask(IMM8, 40),
12093                mask(IMM8, 41),
12094                mask(IMM8, 42),
12095                mask(IMM8, 43),
12096                mask(IMM8, 44),
12097                mask(IMM8, 45),
12098                mask(IMM8, 46),
12099                mask(IMM8, 47),
12100                mask(IMM8, 48),
12101                mask(IMM8, 49),
12102                mask(IMM8, 50),
12103                mask(IMM8, 51),
12104                mask(IMM8, 52),
12105                mask(IMM8, 53),
12106                mask(IMM8, 54),
12107                mask(IMM8, 55),
12108                mask(IMM8, 56),
12109                mask(IMM8, 57),
12110                mask(IMM8, 58),
12111                mask(IMM8, 59),
12112                mask(IMM8, 60),
12113                mask(IMM8, 61),
12114                mask(IMM8, 62),
12115                mask(IMM8, 63),
12116            ],
12117        );
12118        transmute(r)
12119    }
12120}
12121
12122/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
12123///
12124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
12125#[inline]
12126#[target_feature(enable = "avx512bw")]
12127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12128#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
12129#[rustc_legacy_const_generics(1)]
12130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12131pub const fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12132    unsafe {
12133        static_assert_uimm_bits!(IMM8, 8);
12134        const fn mask(shift: i32, i: u32) -> u32 {
12135            let shift = shift as u32 & 0xff;
12136            if shift > 15 || (15 - (i % 16)) < shift {
12137                0
12138            } else {
12139                64 + (i + shift)
12140            }
12141        }
12142        let a = a.as_i8x64();
12143        let zero = i8x64::ZERO;
12144        let r: i8x64 = simd_shuffle!(
12145            zero,
12146            a,
12147            [
12148                mask(IMM8, 0),
12149                mask(IMM8, 1),
12150                mask(IMM8, 2),
12151                mask(IMM8, 3),
12152                mask(IMM8, 4),
12153                mask(IMM8, 5),
12154                mask(IMM8, 6),
12155                mask(IMM8, 7),
12156                mask(IMM8, 8),
12157                mask(IMM8, 9),
12158                mask(IMM8, 10),
12159                mask(IMM8, 11),
12160                mask(IMM8, 12),
12161                mask(IMM8, 13),
12162                mask(IMM8, 14),
12163                mask(IMM8, 15),
12164                mask(IMM8, 16),
12165                mask(IMM8, 17),
12166                mask(IMM8, 18),
12167                mask(IMM8, 19),
12168                mask(IMM8, 20),
12169                mask(IMM8, 21),
12170                mask(IMM8, 22),
12171                mask(IMM8, 23),
12172                mask(IMM8, 24),
12173                mask(IMM8, 25),
12174                mask(IMM8, 26),
12175                mask(IMM8, 27),
12176                mask(IMM8, 28),
12177                mask(IMM8, 29),
12178                mask(IMM8, 30),
12179                mask(IMM8, 31),
12180                mask(IMM8, 32),
12181                mask(IMM8, 33),
12182                mask(IMM8, 34),
12183                mask(IMM8, 35),
12184                mask(IMM8, 36),
12185                mask(IMM8, 37),
12186                mask(IMM8, 38),
12187                mask(IMM8, 39),
12188                mask(IMM8, 40),
12189                mask(IMM8, 41),
12190                mask(IMM8, 42),
12191                mask(IMM8, 43),
12192                mask(IMM8, 44),
12193                mask(IMM8, 45),
12194                mask(IMM8, 46),
12195                mask(IMM8, 47),
12196                mask(IMM8, 48),
12197                mask(IMM8, 49),
12198                mask(IMM8, 50),
12199                mask(IMM8, 51),
12200                mask(IMM8, 52),
12201                mask(IMM8, 53),
12202                mask(IMM8, 54),
12203                mask(IMM8, 55),
12204                mask(IMM8, 56),
12205                mask(IMM8, 57),
12206                mask(IMM8, 58),
12207                mask(IMM8, 59),
12208                mask(IMM8, 60),
12209                mask(IMM8, 61),
12210                mask(IMM8, 62),
12211                mask(IMM8, 63),
12212            ],
12213        );
12214        transmute(r)
12215    }
12216}
12217
12218/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
12219/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
12220/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
12221///
12222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
12223#[inline]
12224#[target_feature(enable = "avx512bw")]
12225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12226#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12227#[rustc_legacy_const_generics(2)]
12228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12229pub const fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
12230    const fn mask(shift: u32, i: u32) -> u32 {
12231        let shift = shift % 16;
12232        let mod_i = i % 16;
12233        if mod_i < (16 - shift) {
12234            i + shift
12235        } else {
12236            i + 48 + shift
12237        }
12238    }
12239
12240    // If palignr is shifting the pair of vectors more than the size of two
12241    // lanes, emit zero.
12242    if IMM8 >= 32 {
12243        return _mm512_setzero_si512();
12244    }
12245    // If palignr is shifting the pair of input vectors more than one lane,
12246    // but less than two lanes, convert to shifting in zeroes.
12247    let (a, b) = if IMM8 > 16 {
12248        (_mm512_setzero_si512(), a)
12249    } else {
12250        (a, b)
12251    };
12252    unsafe {
12253        if IMM8 == 16 {
12254            return transmute(a);
12255        }
12256
12257        let r: i8x64 = simd_shuffle!(
12258            b.as_i8x64(),
12259            a.as_i8x64(),
12260            [
12261                mask(IMM8 as u32, 0),
12262                mask(IMM8 as u32, 1),
12263                mask(IMM8 as u32, 2),
12264                mask(IMM8 as u32, 3),
12265                mask(IMM8 as u32, 4),
12266                mask(IMM8 as u32, 5),
12267                mask(IMM8 as u32, 6),
12268                mask(IMM8 as u32, 7),
12269                mask(IMM8 as u32, 8),
12270                mask(IMM8 as u32, 9),
12271                mask(IMM8 as u32, 10),
12272                mask(IMM8 as u32, 11),
12273                mask(IMM8 as u32, 12),
12274                mask(IMM8 as u32, 13),
12275                mask(IMM8 as u32, 14),
12276                mask(IMM8 as u32, 15),
12277                mask(IMM8 as u32, 16),
12278                mask(IMM8 as u32, 17),
12279                mask(IMM8 as u32, 18),
12280                mask(IMM8 as u32, 19),
12281                mask(IMM8 as u32, 20),
12282                mask(IMM8 as u32, 21),
12283                mask(IMM8 as u32, 22),
12284                mask(IMM8 as u32, 23),
12285                mask(IMM8 as u32, 24),
12286                mask(IMM8 as u32, 25),
12287                mask(IMM8 as u32, 26),
12288                mask(IMM8 as u32, 27),
12289                mask(IMM8 as u32, 28),
12290                mask(IMM8 as u32, 29),
12291                mask(IMM8 as u32, 30),
12292                mask(IMM8 as u32, 31),
12293                mask(IMM8 as u32, 32),
12294                mask(IMM8 as u32, 33),
12295                mask(IMM8 as u32, 34),
12296                mask(IMM8 as u32, 35),
12297                mask(IMM8 as u32, 36),
12298                mask(IMM8 as u32, 37),
12299                mask(IMM8 as u32, 38),
12300                mask(IMM8 as u32, 39),
12301                mask(IMM8 as u32, 40),
12302                mask(IMM8 as u32, 41),
12303                mask(IMM8 as u32, 42),
12304                mask(IMM8 as u32, 43),
12305                mask(IMM8 as u32, 44),
12306                mask(IMM8 as u32, 45),
12307                mask(IMM8 as u32, 46),
12308                mask(IMM8 as u32, 47),
12309                mask(IMM8 as u32, 48),
12310                mask(IMM8 as u32, 49),
12311                mask(IMM8 as u32, 50),
12312                mask(IMM8 as u32, 51),
12313                mask(IMM8 as u32, 52),
12314                mask(IMM8 as u32, 53),
12315                mask(IMM8 as u32, 54),
12316                mask(IMM8 as u32, 55),
12317                mask(IMM8 as u32, 56),
12318                mask(IMM8 as u32, 57),
12319                mask(IMM8 as u32, 58),
12320                mask(IMM8 as u32, 59),
12321                mask(IMM8 as u32, 60),
12322                mask(IMM8 as u32, 61),
12323                mask(IMM8 as u32, 62),
12324                mask(IMM8 as u32, 63),
12325            ],
12326        );
12327        transmute(r)
12328    }
12329}
12330
12331/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12332///
12333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
12334#[inline]
12335#[target_feature(enable = "avx512bw")]
12336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12337#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12338#[rustc_legacy_const_generics(4)]
12339#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12340pub const fn _mm512_mask_alignr_epi8<const IMM8: i32>(
12341    src: __m512i,
12342    k: __mmask64,
12343    a: __m512i,
12344    b: __m512i,
12345) -> __m512i {
12346    unsafe {
12347        static_assert_uimm_bits!(IMM8, 8);
12348        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12349        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
12350    }
12351}
12352
12353/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12354///
12355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
12356#[inline]
12357#[target_feature(enable = "avx512bw")]
12358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12359#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12360#[rustc_legacy_const_generics(3)]
12361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12362pub const fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
12363    k: __mmask64,
12364    a: __m512i,
12365    b: __m512i,
12366) -> __m512i {
12367    unsafe {
12368        static_assert_uimm_bits!(IMM8, 8);
12369        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12370        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
12371    }
12372}
12373
12374/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12375///
12376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
12377#[inline]
12378#[target_feature(enable = "avx512bw,avx512vl")]
12379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380#[rustc_legacy_const_generics(4)]
12381#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12383pub const fn _mm256_mask_alignr_epi8<const IMM8: i32>(
12384    src: __m256i,
12385    k: __mmask32,
12386    a: __m256i,
12387    b: __m256i,
12388) -> __m256i {
12389    unsafe {
12390        static_assert_uimm_bits!(IMM8, 8);
12391        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12392        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
12393    }
12394}
12395
12396/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
12399#[inline]
12400#[target_feature(enable = "avx512bw,avx512vl")]
12401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12402#[rustc_legacy_const_generics(3)]
12403#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12405pub const fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
12406    k: __mmask32,
12407    a: __m256i,
12408    b: __m256i,
12409) -> __m256i {
12410    unsafe {
12411        static_assert_uimm_bits!(IMM8, 8);
12412        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12413        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
12414    }
12415}
12416
12417/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12418///
12419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
12420#[inline]
12421#[target_feature(enable = "avx512bw,avx512vl")]
12422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12423#[rustc_legacy_const_generics(4)]
12424#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12426pub const fn _mm_mask_alignr_epi8<const IMM8: i32>(
12427    src: __m128i,
12428    k: __mmask16,
12429    a: __m128i,
12430    b: __m128i,
12431) -> __m128i {
12432    unsafe {
12433        static_assert_uimm_bits!(IMM8, 8);
12434        let r = _mm_alignr_epi8::<IMM8>(a, b);
12435        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
12436    }
12437}
12438
12439/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12440///
12441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
12442#[inline]
12443#[target_feature(enable = "avx512bw,avx512vl")]
12444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12445#[rustc_legacy_const_generics(3)]
12446#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12448pub const fn _mm_maskz_alignr_epi8<const IMM8: i32>(
12449    k: __mmask16,
12450    a: __m128i,
12451    b: __m128i,
12452) -> __m128i {
12453    unsafe {
12454        static_assert_uimm_bits!(IMM8, 8);
12455        let r = _mm_alignr_epi8::<IMM8>(a, b);
12456        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
12457    }
12458}
12459
12460/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12461///
12462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
12463#[inline]
12464#[target_feature(enable = "avx512bw")]
12465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12466#[cfg_attr(test, assert_instr(vpmovswb))]
12467pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12468    vpmovswbmem(mem_addr, a.as_i16x32(), k);
12469}
12470
12471/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12472///
12473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
12474#[inline]
12475#[target_feature(enable = "avx512bw,avx512vl")]
12476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12477#[cfg_attr(test, assert_instr(vpmovswb))]
12478pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12479    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
12480}
12481
12482/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12483///
12484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
12485#[inline]
12486#[target_feature(enable = "avx512bw,avx512vl")]
12487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12488#[cfg_attr(test, assert_instr(vpmovswb))]
12489pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12490    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
12491}
12492
12493/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12494///
12495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
12496#[inline]
12497#[target_feature(enable = "avx512bw")]
12498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12499#[cfg_attr(test, assert_instr(vpmovwb))]
12500#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12501pub const unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12502    let result = _mm512_cvtepi16_epi8(a).as_i8x32();
12503    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
12504    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12505}
12506
12507/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12508///
12509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
12510#[inline]
12511#[target_feature(enable = "avx512bw,avx512vl")]
12512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12513#[cfg_attr(test, assert_instr(vpmovwb))]
12514#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12515pub const unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12516    let result = _mm256_cvtepi16_epi8(a).as_i8x16();
12517    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
12518    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12519}
12520
12521/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12522///
12523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
12524#[inline]
12525#[target_feature(enable = "avx512bw,avx512vl")]
12526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12527#[cfg_attr(test, assert_instr(vpmovwb))]
12528#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12529pub const unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12530    let result: i8x8 = simd_shuffle!(
12531        _mm_cvtepi16_epi8(a).as_i8x16(),
12532        i8x16::ZERO,
12533        [0, 1, 2, 3, 4, 5, 6, 7]
12534    );
12535    let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
12536    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12537}
12538
12539/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12540///
12541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
12542#[inline]
12543#[target_feature(enable = "avx512bw")]
12544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12545#[cfg_attr(test, assert_instr(vpmovuswb))]
12546pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12547    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
12548}
12549
12550/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
12553#[inline]
12554#[target_feature(enable = "avx512bw,avx512vl")]
12555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12556#[cfg_attr(test, assert_instr(vpmovuswb))]
12557pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12558    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
12559}
12560
12561/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12562///
12563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
12564#[inline]
12565#[target_feature(enable = "avx512bw,avx512vl")]
12566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12567#[cfg_attr(test, assert_instr(vpmovuswb))]
12568pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12569    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
12570}
12571
12572#[allow(improper_ctypes)]
12573unsafe extern "C" {
12574    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
12575    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
12576
12577    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
12578    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
12579    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
12580    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
12581
12582    #[link_name = "llvm.x86.avx512.packssdw.512"]
12583    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
12584    #[link_name = "llvm.x86.avx512.packsswb.512"]
12585    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
12586    #[link_name = "llvm.x86.avx512.packusdw.512"]
12587    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
12588    #[link_name = "llvm.x86.avx512.packuswb.512"]
12589    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
12590
12591    #[link_name = "llvm.x86.avx512.psll.w.512"]
12592    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
12593
12594    #[link_name = "llvm.x86.avx512.psrl.w.512"]
12595    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
12596
12597    #[link_name = "llvm.x86.avx512.psra.w.512"]
12598    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
12599
12600    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
12601    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
12602    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
12603    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
12604    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
12605    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
12606
12607    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
12608    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
12609    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
12610    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
12611    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
12612    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
12613
12614    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
12615    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
12616
12617    #[link_name = "llvm.x86.avx512.psad.bw.512"]
12618    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
12619
12620    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
12621    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
12622    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
12623    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
12624    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
12625    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
12626
12627    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
12628    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
12629
12630    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
12631    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
12632
12633    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
12634    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12635    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
12636    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
12637    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
12638    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
12639
12640    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
12641    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12642    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
12643    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
12644    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
12645    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
12646}
12647
12648#[cfg(test)]
12649mod tests {
12650    use crate::core_arch::assert_eq_const as assert_eq;
12651
12652    use stdarch_test::simd_test;
12653
12654    use crate::core_arch::x86::*;
12655    use crate::hint::black_box;
12656    use crate::mem::{self};
12657
12658    #[simd_test(enable = "avx512bw")]
12659    const fn test_mm512_abs_epi16() {
12660        let a = _mm512_set1_epi16(-1);
12661        let r = _mm512_abs_epi16(a);
12662        let e = _mm512_set1_epi16(1);
12663        assert_eq_m512i(r, e);
12664    }
12665
12666    #[simd_test(enable = "avx512bw")]
12667    const fn test_mm512_mask_abs_epi16() {
12668        let a = _mm512_set1_epi16(-1);
12669        let r = _mm512_mask_abs_epi16(a, 0, a);
12670        assert_eq_m512i(r, a);
12671        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
12672        #[rustfmt::skip]
12673        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12674                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12675        assert_eq_m512i(r, e);
12676    }
12677
12678    #[simd_test(enable = "avx512bw")]
12679    const fn test_mm512_maskz_abs_epi16() {
12680        let a = _mm512_set1_epi16(-1);
12681        let r = _mm512_maskz_abs_epi16(0, a);
12682        assert_eq_m512i(r, _mm512_setzero_si512());
12683        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
12684        #[rustfmt::skip]
12685        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12686                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12687        assert_eq_m512i(r, e);
12688    }
12689
12690    #[simd_test(enable = "avx512bw,avx512vl")]
12691    const fn test_mm256_mask_abs_epi16() {
12692        let a = _mm256_set1_epi16(-1);
12693        let r = _mm256_mask_abs_epi16(a, 0, a);
12694        assert_eq_m256i(r, a);
12695        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
12696        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12697        assert_eq_m256i(r, e);
12698    }
12699
12700    #[simd_test(enable = "avx512bw,avx512vl")]
12701    const fn test_mm256_maskz_abs_epi16() {
12702        let a = _mm256_set1_epi16(-1);
12703        let r = _mm256_maskz_abs_epi16(0, a);
12704        assert_eq_m256i(r, _mm256_setzero_si256());
12705        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
12706        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12707        assert_eq_m256i(r, e);
12708    }
12709
12710    #[simd_test(enable = "avx512bw,avx512vl")]
12711    const fn test_mm_mask_abs_epi16() {
12712        let a = _mm_set1_epi16(-1);
12713        let r = _mm_mask_abs_epi16(a, 0, a);
12714        assert_eq_m128i(r, a);
12715        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
12716        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
12717        assert_eq_m128i(r, e);
12718    }
12719
12720    #[simd_test(enable = "avx512bw,avx512vl")]
12721    const fn test_mm_maskz_abs_epi16() {
12722        let a = _mm_set1_epi16(-1);
12723        let r = _mm_maskz_abs_epi16(0, a);
12724        assert_eq_m128i(r, _mm_setzero_si128());
12725        let r = _mm_maskz_abs_epi16(0b00001111, a);
12726        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
12727        assert_eq_m128i(r, e);
12728    }
12729
12730    #[simd_test(enable = "avx512bw")]
12731    const fn test_mm512_abs_epi8() {
12732        let a = _mm512_set1_epi8(-1);
12733        let r = _mm512_abs_epi8(a);
12734        let e = _mm512_set1_epi8(1);
12735        assert_eq_m512i(r, e);
12736    }
12737
12738    #[simd_test(enable = "avx512bw")]
12739    const fn test_mm512_mask_abs_epi8() {
12740        let a = _mm512_set1_epi8(-1);
12741        let r = _mm512_mask_abs_epi8(a, 0, a);
12742        assert_eq_m512i(r, a);
12743        let r = _mm512_mask_abs_epi8(
12744            a,
12745            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12746            a,
12747        );
12748        #[rustfmt::skip]
12749        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12750                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12751                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12752                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12753        assert_eq_m512i(r, e);
12754    }
12755
12756    #[simd_test(enable = "avx512bw")]
12757    const fn test_mm512_maskz_abs_epi8() {
12758        let a = _mm512_set1_epi8(-1);
12759        let r = _mm512_maskz_abs_epi8(0, a);
12760        assert_eq_m512i(r, _mm512_setzero_si512());
12761        let r = _mm512_maskz_abs_epi8(
12762            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12763            a,
12764        );
12765        #[rustfmt::skip]
12766        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12767                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12768                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12769                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12770        assert_eq_m512i(r, e);
12771    }
12772
12773    #[simd_test(enable = "avx512bw,avx512vl")]
12774    const fn test_mm256_mask_abs_epi8() {
12775        let a = _mm256_set1_epi8(-1);
12776        let r = _mm256_mask_abs_epi8(a, 0, a);
12777        assert_eq_m256i(r, a);
12778        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
12779        #[rustfmt::skip]
12780        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12781                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12782        assert_eq_m256i(r, e);
12783    }
12784
12785    #[simd_test(enable = "avx512bw,avx512vl")]
12786    const fn test_mm256_maskz_abs_epi8() {
12787        let a = _mm256_set1_epi8(-1);
12788        let r = _mm256_maskz_abs_epi8(0, a);
12789        assert_eq_m256i(r, _mm256_setzero_si256());
12790        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
12791        #[rustfmt::skip]
12792        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12793                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12794        assert_eq_m256i(r, e);
12795    }
12796
12797    #[simd_test(enable = "avx512bw,avx512vl")]
12798    const fn test_mm_mask_abs_epi8() {
12799        let a = _mm_set1_epi8(-1);
12800        let r = _mm_mask_abs_epi8(a, 0, a);
12801        assert_eq_m128i(r, a);
12802        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
12803        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12804        assert_eq_m128i(r, e);
12805    }
12806
12807    #[simd_test(enable = "avx512bw,avx512vl")]
12808    const fn test_mm_maskz_abs_epi8() {
12809        let a = _mm_set1_epi8(-1);
12810        let r = _mm_maskz_abs_epi8(0, a);
12811        assert_eq_m128i(r, _mm_setzero_si128());
12812        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
12813        #[rustfmt::skip]
12814        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12815        assert_eq_m128i(r, e);
12816    }
12817
12818    #[simd_test(enable = "avx512bw")]
12819    const fn test_mm512_add_epi16() {
12820        let a = _mm512_set1_epi16(1);
12821        let b = _mm512_set1_epi16(2);
12822        let r = _mm512_add_epi16(a, b);
12823        let e = _mm512_set1_epi16(3);
12824        assert_eq_m512i(r, e);
12825    }
12826
12827    #[simd_test(enable = "avx512bw")]
12828    const fn test_mm512_mask_add_epi16() {
12829        let a = _mm512_set1_epi16(1);
12830        let b = _mm512_set1_epi16(2);
12831        let r = _mm512_mask_add_epi16(a, 0, a, b);
12832        assert_eq_m512i(r, a);
12833        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12834        #[rustfmt::skip]
12835        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12836                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12837        assert_eq_m512i(r, e);
12838    }
12839
12840    #[simd_test(enable = "avx512bw")]
12841    const fn test_mm512_maskz_add_epi16() {
12842        let a = _mm512_set1_epi16(1);
12843        let b = _mm512_set1_epi16(2);
12844        let r = _mm512_maskz_add_epi16(0, a, b);
12845        assert_eq_m512i(r, _mm512_setzero_si512());
12846        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
12847        #[rustfmt::skip]
12848        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12849                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12850        assert_eq_m512i(r, e);
12851    }
12852
12853    #[simd_test(enable = "avx512bw,avx512vl")]
12854    const fn test_mm256_mask_add_epi16() {
12855        let a = _mm256_set1_epi16(1);
12856        let b = _mm256_set1_epi16(2);
12857        let r = _mm256_mask_add_epi16(a, 0, a, b);
12858        assert_eq_m256i(r, a);
12859        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
12860        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12861        assert_eq_m256i(r, e);
12862    }
12863
12864    #[simd_test(enable = "avx512bw,avx512vl")]
12865    const fn test_mm256_maskz_add_epi16() {
12866        let a = _mm256_set1_epi16(1);
12867        let b = _mm256_set1_epi16(2);
12868        let r = _mm256_maskz_add_epi16(0, a, b);
12869        assert_eq_m256i(r, _mm256_setzero_si256());
12870        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
12871        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12872        assert_eq_m256i(r, e);
12873    }
12874
12875    #[simd_test(enable = "avx512bw,avx512vl")]
12876    const fn test_mm_mask_add_epi16() {
12877        let a = _mm_set1_epi16(1);
12878        let b = _mm_set1_epi16(2);
12879        let r = _mm_mask_add_epi16(a, 0, a, b);
12880        assert_eq_m128i(r, a);
12881        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
12882        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
12883        assert_eq_m128i(r, e);
12884    }
12885
12886    #[simd_test(enable = "avx512bw,avx512vl")]
12887    const fn test_mm_maskz_add_epi16() {
12888        let a = _mm_set1_epi16(1);
12889        let b = _mm_set1_epi16(2);
12890        let r = _mm_maskz_add_epi16(0, a, b);
12891        assert_eq_m128i(r, _mm_setzero_si128());
12892        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12893        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12894        assert_eq_m128i(r, e);
12895    }
12896
12897    #[simd_test(enable = "avx512bw")]
12898    const fn test_mm512_add_epi8() {
12899        let a = _mm512_set1_epi8(1);
12900        let b = _mm512_set1_epi8(2);
12901        let r = _mm512_add_epi8(a, b);
12902        let e = _mm512_set1_epi8(3);
12903        assert_eq_m512i(r, e);
12904    }
12905
12906    #[simd_test(enable = "avx512bw")]
12907    const fn test_mm512_mask_add_epi8() {
12908        let a = _mm512_set1_epi8(1);
12909        let b = _mm512_set1_epi8(2);
12910        let r = _mm512_mask_add_epi8(a, 0, a, b);
12911        assert_eq_m512i(r, a);
12912        let r = _mm512_mask_add_epi8(
12913            a,
12914            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12915            a,
12916            b,
12917        );
12918        #[rustfmt::skip]
12919        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12920                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12921                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12922                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12923        assert_eq_m512i(r, e);
12924    }
12925
12926    #[simd_test(enable = "avx512bw")]
12927    const fn test_mm512_maskz_add_epi8() {
12928        let a = _mm512_set1_epi8(1);
12929        let b = _mm512_set1_epi8(2);
12930        let r = _mm512_maskz_add_epi8(0, a, b);
12931        assert_eq_m512i(r, _mm512_setzero_si512());
12932        let r = _mm512_maskz_add_epi8(
12933            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12934            a,
12935            b,
12936        );
12937        #[rustfmt::skip]
12938        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12939                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12940                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12941                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12942        assert_eq_m512i(r, e);
12943    }
12944
12945    #[simd_test(enable = "avx512bw,avx512vl")]
12946    const fn test_mm256_mask_add_epi8() {
12947        let a = _mm256_set1_epi8(1);
12948        let b = _mm256_set1_epi8(2);
12949        let r = _mm256_mask_add_epi8(a, 0, a, b);
12950        assert_eq_m256i(r, a);
12951        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12952        #[rustfmt::skip]
12953        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12954                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12955        assert_eq_m256i(r, e);
12956    }
12957
12958    #[simd_test(enable = "avx512bw,avx512vl")]
12959    const fn test_mm256_maskz_add_epi8() {
12960        let a = _mm256_set1_epi8(1);
12961        let b = _mm256_set1_epi8(2);
12962        let r = _mm256_maskz_add_epi8(0, a, b);
12963        assert_eq_m256i(r, _mm256_setzero_si256());
12964        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12965        #[rustfmt::skip]
12966        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12967                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12968        assert_eq_m256i(r, e);
12969    }
12970
12971    #[simd_test(enable = "avx512bw,avx512vl")]
12972    const fn test_mm_mask_add_epi8() {
12973        let a = _mm_set1_epi8(1);
12974        let b = _mm_set1_epi8(2);
12975        let r = _mm_mask_add_epi8(a, 0, a, b);
12976        assert_eq_m128i(r, a);
12977        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12978        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12979        assert_eq_m128i(r, e);
12980    }
12981
12982    #[simd_test(enable = "avx512bw,avx512vl")]
12983    const fn test_mm_maskz_add_epi8() {
12984        let a = _mm_set1_epi8(1);
12985        let b = _mm_set1_epi8(2);
12986        let r = _mm_maskz_add_epi8(0, a, b);
12987        assert_eq_m128i(r, _mm_setzero_si128());
12988        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12989        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12990        assert_eq_m128i(r, e);
12991    }
12992
12993    #[simd_test(enable = "avx512bw")]
12994    const fn test_mm512_adds_epu16() {
12995        let a = _mm512_set1_epi16(1);
12996        let b = _mm512_set1_epi16(u16::MAX as i16);
12997        let r = _mm512_adds_epu16(a, b);
12998        let e = _mm512_set1_epi16(u16::MAX as i16);
12999        assert_eq_m512i(r, e);
13000    }
13001
13002    #[simd_test(enable = "avx512bw")]
13003    const fn test_mm512_mask_adds_epu16() {
13004        let a = _mm512_set1_epi16(1);
13005        let b = _mm512_set1_epi16(u16::MAX as i16);
13006        let r = _mm512_mask_adds_epu16(a, 0, a, b);
13007        assert_eq_m512i(r, a);
13008        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13009        #[rustfmt::skip]
13010        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13011                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13012        assert_eq_m512i(r, e);
13013    }
13014
13015    #[simd_test(enable = "avx512bw")]
13016    const fn test_mm512_maskz_adds_epu16() {
13017        let a = _mm512_set1_epi16(1);
13018        let b = _mm512_set1_epi16(u16::MAX as i16);
13019        let r = _mm512_maskz_adds_epu16(0, a, b);
13020        assert_eq_m512i(r, _mm512_setzero_si512());
13021        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
13022        #[rustfmt::skip]
13023        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13024                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13025        assert_eq_m512i(r, e);
13026    }
13027
13028    #[simd_test(enable = "avx512bw,avx512vl")]
13029    const fn test_mm256_mask_adds_epu16() {
13030        let a = _mm256_set1_epi16(1);
13031        let b = _mm256_set1_epi16(u16::MAX as i16);
13032        let r = _mm256_mask_adds_epu16(a, 0, a, b);
13033        assert_eq_m256i(r, a);
13034        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
13035        #[rustfmt::skip]
13036        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13037        assert_eq_m256i(r, e);
13038    }
13039
13040    #[simd_test(enable = "avx512bw,avx512vl")]
13041    const fn test_mm256_maskz_adds_epu16() {
13042        let a = _mm256_set1_epi16(1);
13043        let b = _mm256_set1_epi16(u16::MAX as i16);
13044        let r = _mm256_maskz_adds_epu16(0, a, b);
13045        assert_eq_m256i(r, _mm256_setzero_si256());
13046        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
13047        #[rustfmt::skip]
13048        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13049        assert_eq_m256i(r, e);
13050    }
13051
13052    #[simd_test(enable = "avx512bw,avx512vl")]
13053    const fn test_mm_mask_adds_epu16() {
13054        let a = _mm_set1_epi16(1);
13055        let b = _mm_set1_epi16(u16::MAX as i16);
13056        let r = _mm_mask_adds_epu16(a, 0, a, b);
13057        assert_eq_m128i(r, a);
13058        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
13059        #[rustfmt::skip]
13060        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13061        assert_eq_m128i(r, e);
13062    }
13063
13064    #[simd_test(enable = "avx512bw,avx512vl")]
13065    const fn test_mm_maskz_adds_epu16() {
13066        let a = _mm_set1_epi16(1);
13067        let b = _mm_set1_epi16(u16::MAX as i16);
13068        let r = _mm_maskz_adds_epu16(0, a, b);
13069        assert_eq_m128i(r, _mm_setzero_si128());
13070        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
13071        #[rustfmt::skip]
13072        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13073        assert_eq_m128i(r, e);
13074    }
13075
13076    #[simd_test(enable = "avx512bw")]
13077    const fn test_mm512_adds_epu8() {
13078        let a = _mm512_set1_epi8(1);
13079        let b = _mm512_set1_epi8(u8::MAX as i8);
13080        let r = _mm512_adds_epu8(a, b);
13081        let e = _mm512_set1_epi8(u8::MAX as i8);
13082        assert_eq_m512i(r, e);
13083    }
13084
13085    #[simd_test(enable = "avx512bw")]
13086    const fn test_mm512_mask_adds_epu8() {
13087        let a = _mm512_set1_epi8(1);
13088        let b = _mm512_set1_epi8(u8::MAX as i8);
13089        let r = _mm512_mask_adds_epu8(a, 0, a, b);
13090        assert_eq_m512i(r, a);
13091        let r = _mm512_mask_adds_epu8(
13092            a,
13093            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13094            a,
13095            b,
13096        );
13097        #[rustfmt::skip]
13098        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13099                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13100                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13101                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13102        assert_eq_m512i(r, e);
13103    }
13104
13105    #[simd_test(enable = "avx512bw")]
13106    const fn test_mm512_maskz_adds_epu8() {
13107        let a = _mm512_set1_epi8(1);
13108        let b = _mm512_set1_epi8(u8::MAX as i8);
13109        let r = _mm512_maskz_adds_epu8(0, a, b);
13110        assert_eq_m512i(r, _mm512_setzero_si512());
13111        let r = _mm512_maskz_adds_epu8(
13112            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13113            a,
13114            b,
13115        );
13116        #[rustfmt::skip]
13117        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13118                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13119                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13120                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13121        assert_eq_m512i(r, e);
13122    }
13123
13124    #[simd_test(enable = "avx512bw,avx512vl")]
13125    const fn test_mm256_mask_adds_epu8() {
13126        let a = _mm256_set1_epi8(1);
13127        let b = _mm256_set1_epi8(u8::MAX as i8);
13128        let r = _mm256_mask_adds_epu8(a, 0, a, b);
13129        assert_eq_m256i(r, a);
13130        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13131        #[rustfmt::skip]
13132        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13133                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13134        assert_eq_m256i(r, e);
13135    }
13136
13137    #[simd_test(enable = "avx512bw,avx512vl")]
13138    const fn test_mm256_maskz_adds_epu8() {
13139        let a = _mm256_set1_epi8(1);
13140        let b = _mm256_set1_epi8(u8::MAX as i8);
13141        let r = _mm256_maskz_adds_epu8(0, a, b);
13142        assert_eq_m256i(r, _mm256_setzero_si256());
13143        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
13144        #[rustfmt::skip]
13145        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13146                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13147        assert_eq_m256i(r, e);
13148    }
13149
13150    #[simd_test(enable = "avx512bw,avx512vl")]
13151    const fn test_mm_mask_adds_epu8() {
13152        let a = _mm_set1_epi8(1);
13153        let b = _mm_set1_epi8(u8::MAX as i8);
13154        let r = _mm_mask_adds_epu8(a, 0, a, b);
13155        assert_eq_m128i(r, a);
13156        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
13157        #[rustfmt::skip]
13158        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13159        assert_eq_m128i(r, e);
13160    }
13161
13162    #[simd_test(enable = "avx512bw,avx512vl")]
13163    const fn test_mm_maskz_adds_epu8() {
13164        let a = _mm_set1_epi8(1);
13165        let b = _mm_set1_epi8(u8::MAX as i8);
13166        let r = _mm_maskz_adds_epu8(0, a, b);
13167        assert_eq_m128i(r, _mm_setzero_si128());
13168        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
13169        #[rustfmt::skip]
13170        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13171        assert_eq_m128i(r, e);
13172    }
13173
13174    #[simd_test(enable = "avx512bw")]
13175    const fn test_mm512_adds_epi16() {
13176        let a = _mm512_set1_epi16(1);
13177        let b = _mm512_set1_epi16(i16::MAX);
13178        let r = _mm512_adds_epi16(a, b);
13179        let e = _mm512_set1_epi16(i16::MAX);
13180        assert_eq_m512i(r, e);
13181    }
13182
13183    #[simd_test(enable = "avx512bw")]
13184    const fn test_mm512_mask_adds_epi16() {
13185        let a = _mm512_set1_epi16(1);
13186        let b = _mm512_set1_epi16(i16::MAX);
13187        let r = _mm512_mask_adds_epi16(a, 0, a, b);
13188        assert_eq_m512i(r, a);
13189        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13190        #[rustfmt::skip]
13191        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13192                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13193        assert_eq_m512i(r, e);
13194    }
13195
13196    #[simd_test(enable = "avx512bw")]
13197    const fn test_mm512_maskz_adds_epi16() {
13198        let a = _mm512_set1_epi16(1);
13199        let b = _mm512_set1_epi16(i16::MAX);
13200        let r = _mm512_maskz_adds_epi16(0, a, b);
13201        assert_eq_m512i(r, _mm512_setzero_si512());
13202        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
13203        #[rustfmt::skip]
13204        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13205                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13206        assert_eq_m512i(r, e);
13207    }
13208
13209    #[simd_test(enable = "avx512bw,avx512vl")]
13210    const fn test_mm256_mask_adds_epi16() {
13211        let a = _mm256_set1_epi16(1);
13212        let b = _mm256_set1_epi16(i16::MAX);
13213        let r = _mm256_mask_adds_epi16(a, 0, a, b);
13214        assert_eq_m256i(r, a);
13215        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
13216        #[rustfmt::skip]
13217        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13218        assert_eq_m256i(r, e);
13219    }
13220
13221    #[simd_test(enable = "avx512bw,avx512vl")]
13222    const fn test_mm256_maskz_adds_epi16() {
13223        let a = _mm256_set1_epi16(1);
13224        let b = _mm256_set1_epi16(i16::MAX);
13225        let r = _mm256_maskz_adds_epi16(0, a, b);
13226        assert_eq_m256i(r, _mm256_setzero_si256());
13227        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
13228        #[rustfmt::skip]
13229        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13230        assert_eq_m256i(r, e);
13231    }
13232
13233    #[simd_test(enable = "avx512bw,avx512vl")]
13234    const fn test_mm_mask_adds_epi16() {
13235        let a = _mm_set1_epi16(1);
13236        let b = _mm_set1_epi16(i16::MAX);
13237        let r = _mm_mask_adds_epi16(a, 0, a, b);
13238        assert_eq_m128i(r, a);
13239        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
13240        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13241        assert_eq_m128i(r, e);
13242    }
13243
13244    #[simd_test(enable = "avx512bw,avx512vl")]
13245    const fn test_mm_maskz_adds_epi16() {
13246        let a = _mm_set1_epi16(1);
13247        let b = _mm_set1_epi16(i16::MAX);
13248        let r = _mm_maskz_adds_epi16(0, a, b);
13249        assert_eq_m128i(r, _mm_setzero_si128());
13250        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
13251        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13252        assert_eq_m128i(r, e);
13253    }
13254
13255    #[simd_test(enable = "avx512bw")]
13256    const fn test_mm512_adds_epi8() {
13257        let a = _mm512_set1_epi8(1);
13258        let b = _mm512_set1_epi8(i8::MAX);
13259        let r = _mm512_adds_epi8(a, b);
13260        let e = _mm512_set1_epi8(i8::MAX);
13261        assert_eq_m512i(r, e);
13262    }
13263
13264    #[simd_test(enable = "avx512bw")]
13265    const fn test_mm512_mask_adds_epi8() {
13266        let a = _mm512_set1_epi8(1);
13267        let b = _mm512_set1_epi8(i8::MAX);
13268        let r = _mm512_mask_adds_epi8(a, 0, a, b);
13269        assert_eq_m512i(r, a);
13270        let r = _mm512_mask_adds_epi8(
13271            a,
13272            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13273            a,
13274            b,
13275        );
13276        #[rustfmt::skip]
13277        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13278                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13279                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13280                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13281        assert_eq_m512i(r, e);
13282    }
13283
13284    #[simd_test(enable = "avx512bw")]
13285    const fn test_mm512_maskz_adds_epi8() {
13286        let a = _mm512_set1_epi8(1);
13287        let b = _mm512_set1_epi8(i8::MAX);
13288        let r = _mm512_maskz_adds_epi8(0, a, b);
13289        assert_eq_m512i(r, _mm512_setzero_si512());
13290        let r = _mm512_maskz_adds_epi8(
13291            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13292            a,
13293            b,
13294        );
13295        #[rustfmt::skip]
13296        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13297                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13298                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13299                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13300        assert_eq_m512i(r, e);
13301    }
13302
13303    #[simd_test(enable = "avx512bw,avx512vl")]
13304    const fn test_mm256_mask_adds_epi8() {
13305        let a = _mm256_set1_epi8(1);
13306        let b = _mm256_set1_epi8(i8::MAX);
13307        let r = _mm256_mask_adds_epi8(a, 0, a, b);
13308        assert_eq_m256i(r, a);
13309        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13310        #[rustfmt::skip]
13311        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13312                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13313        assert_eq_m256i(r, e);
13314    }
13315
13316    #[simd_test(enable = "avx512bw,avx512vl")]
13317    const fn test_mm256_maskz_adds_epi8() {
13318        let a = _mm256_set1_epi8(1);
13319        let b = _mm256_set1_epi8(i8::MAX);
13320        let r = _mm256_maskz_adds_epi8(0, a, b);
13321        assert_eq_m256i(r, _mm256_setzero_si256());
13322        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
13323        #[rustfmt::skip]
13324        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13325                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13326        assert_eq_m256i(r, e);
13327    }
13328
13329    #[simd_test(enable = "avx512bw,avx512vl")]
13330    const fn test_mm_mask_adds_epi8() {
13331        let a = _mm_set1_epi8(1);
13332        let b = _mm_set1_epi8(i8::MAX);
13333        let r = _mm_mask_adds_epi8(a, 0, a, b);
13334        assert_eq_m128i(r, a);
13335        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
13336        #[rustfmt::skip]
13337        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13338        assert_eq_m128i(r, e);
13339    }
13340
13341    #[simd_test(enable = "avx512bw,avx512vl")]
13342    const fn test_mm_maskz_adds_epi8() {
13343        let a = _mm_set1_epi8(1);
13344        let b = _mm_set1_epi8(i8::MAX);
13345        let r = _mm_maskz_adds_epi8(0, a, b);
13346        assert_eq_m128i(r, _mm_setzero_si128());
13347        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
13348        #[rustfmt::skip]
13349        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13350        assert_eq_m128i(r, e);
13351    }
13352
13353    #[simd_test(enable = "avx512bw")]
13354    const fn test_mm512_sub_epi16() {
13355        let a = _mm512_set1_epi16(1);
13356        let b = _mm512_set1_epi16(2);
13357        let r = _mm512_sub_epi16(a, b);
13358        let e = _mm512_set1_epi16(-1);
13359        assert_eq_m512i(r, e);
13360    }
13361
13362    #[simd_test(enable = "avx512bw")]
13363    const fn test_mm512_mask_sub_epi16() {
13364        let a = _mm512_set1_epi16(1);
13365        let b = _mm512_set1_epi16(2);
13366        let r = _mm512_mask_sub_epi16(a, 0, a, b);
13367        assert_eq_m512i(r, a);
13368        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13369        #[rustfmt::skip]
13370        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13371                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13372        assert_eq_m512i(r, e);
13373    }
13374
13375    #[simd_test(enable = "avx512bw")]
13376    const fn test_mm512_maskz_sub_epi16() {
13377        let a = _mm512_set1_epi16(1);
13378        let b = _mm512_set1_epi16(2);
13379        let r = _mm512_maskz_sub_epi16(0, a, b);
13380        assert_eq_m512i(r, _mm512_setzero_si512());
13381        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
13382        #[rustfmt::skip]
13383        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13384                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13385        assert_eq_m512i(r, e);
13386    }
13387
13388    #[simd_test(enable = "avx512bw,avx512vl")]
13389    const fn test_mm256_mask_sub_epi16() {
13390        let a = _mm256_set1_epi16(1);
13391        let b = _mm256_set1_epi16(2);
13392        let r = _mm256_mask_sub_epi16(a, 0, a, b);
13393        assert_eq_m256i(r, a);
13394        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
13395        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13396        assert_eq_m256i(r, e);
13397    }
13398
13399    #[simd_test(enable = "avx512bw,avx512vl")]
13400    const fn test_mm256_maskz_sub_epi16() {
13401        let a = _mm256_set1_epi16(1);
13402        let b = _mm256_set1_epi16(2);
13403        let r = _mm256_maskz_sub_epi16(0, a, b);
13404        assert_eq_m256i(r, _mm256_setzero_si256());
13405        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
13406        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13407        assert_eq_m256i(r, e);
13408    }
13409
13410    #[simd_test(enable = "avx512bw,avx512vl")]
13411    const fn test_mm_mask_sub_epi16() {
13412        let a = _mm_set1_epi16(1);
13413        let b = _mm_set1_epi16(2);
13414        let r = _mm_mask_sub_epi16(a, 0, a, b);
13415        assert_eq_m128i(r, a);
13416        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
13417        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
13418        assert_eq_m128i(r, e);
13419    }
13420
13421    #[simd_test(enable = "avx512bw,avx512vl")]
13422    const fn test_mm_maskz_sub_epi16() {
13423        let a = _mm_set1_epi16(1);
13424        let b = _mm_set1_epi16(2);
13425        let r = _mm_maskz_sub_epi16(0, a, b);
13426        assert_eq_m128i(r, _mm_setzero_si128());
13427        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
13428        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
13429        assert_eq_m128i(r, e);
13430    }
13431
13432    #[simd_test(enable = "avx512bw")]
13433    const fn test_mm512_sub_epi8() {
13434        let a = _mm512_set1_epi8(1);
13435        let b = _mm512_set1_epi8(2);
13436        let r = _mm512_sub_epi8(a, b);
13437        let e = _mm512_set1_epi8(-1);
13438        assert_eq_m512i(r, e);
13439    }
13440
13441    #[simd_test(enable = "avx512bw")]
13442    const fn test_mm512_mask_sub_epi8() {
13443        let a = _mm512_set1_epi8(1);
13444        let b = _mm512_set1_epi8(2);
13445        let r = _mm512_mask_sub_epi8(a, 0, a, b);
13446        assert_eq_m512i(r, a);
13447        let r = _mm512_mask_sub_epi8(
13448            a,
13449            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13450            a,
13451            b,
13452        );
13453        #[rustfmt::skip]
13454        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13455                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13456                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13457                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13458        assert_eq_m512i(r, e);
13459    }
13460
13461    #[simd_test(enable = "avx512bw")]
13462    const fn test_mm512_maskz_sub_epi8() {
13463        let a = _mm512_set1_epi8(1);
13464        let b = _mm512_set1_epi8(2);
13465        let r = _mm512_maskz_sub_epi8(0, a, b);
13466        assert_eq_m512i(r, _mm512_setzero_si512());
13467        let r = _mm512_maskz_sub_epi8(
13468            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13469            a,
13470            b,
13471        );
13472        #[rustfmt::skip]
13473        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13474                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13475                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13476                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13477        assert_eq_m512i(r, e);
13478    }
13479
13480    #[simd_test(enable = "avx512bw,avx512vl")]
13481    const fn test_mm256_mask_sub_epi8() {
13482        let a = _mm256_set1_epi8(1);
13483        let b = _mm256_set1_epi8(2);
13484        let r = _mm256_mask_sub_epi8(a, 0, a, b);
13485        assert_eq_m256i(r, a);
13486        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13487        #[rustfmt::skip]
13488        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13489                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13490        assert_eq_m256i(r, e);
13491    }
13492
13493    #[simd_test(enable = "avx512bw,avx512vl")]
13494    const fn test_mm256_maskz_sub_epi8() {
13495        let a = _mm256_set1_epi8(1);
13496        let b = _mm256_set1_epi8(2);
13497        let r = _mm256_maskz_sub_epi8(0, a, b);
13498        assert_eq_m256i(r, _mm256_setzero_si256());
13499        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
13500        #[rustfmt::skip]
13501        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13502                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13503        assert_eq_m256i(r, e);
13504    }
13505
13506    #[simd_test(enable = "avx512bw,avx512vl")]
13507    const fn test_mm_mask_sub_epi8() {
13508        let a = _mm_set1_epi8(1);
13509        let b = _mm_set1_epi8(2);
13510        let r = _mm_mask_sub_epi8(a, 0, a, b);
13511        assert_eq_m128i(r, a);
13512        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
13513        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13514        assert_eq_m128i(r, e);
13515    }
13516
13517    #[simd_test(enable = "avx512bw,avx512vl")]
13518    const fn test_mm_maskz_sub_epi8() {
13519        let a = _mm_set1_epi8(1);
13520        let b = _mm_set1_epi8(2);
13521        let r = _mm_maskz_sub_epi8(0, a, b);
13522        assert_eq_m128i(r, _mm_setzero_si128());
13523        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
13524        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13525        assert_eq_m128i(r, e);
13526    }
13527
13528    #[simd_test(enable = "avx512bw")]
13529    const fn test_mm512_subs_epu16() {
13530        let a = _mm512_set1_epi16(1);
13531        let b = _mm512_set1_epi16(u16::MAX as i16);
13532        let r = _mm512_subs_epu16(a, b);
13533        let e = _mm512_set1_epi16(0);
13534        assert_eq_m512i(r, e);
13535    }
13536
13537    #[simd_test(enable = "avx512bw")]
13538    const fn test_mm512_mask_subs_epu16() {
13539        let a = _mm512_set1_epi16(1);
13540        let b = _mm512_set1_epi16(u16::MAX as i16);
13541        let r = _mm512_mask_subs_epu16(a, 0, a, b);
13542        assert_eq_m512i(r, a);
13543        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13544        #[rustfmt::skip]
13545        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13546                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13547        assert_eq_m512i(r, e);
13548    }
13549
13550    #[simd_test(enable = "avx512bw")]
13551    const fn test_mm512_maskz_subs_epu16() {
13552        let a = _mm512_set1_epi16(1);
13553        let b = _mm512_set1_epi16(u16::MAX as i16);
13554        let r = _mm512_maskz_subs_epu16(0, a, b);
13555        assert_eq_m512i(r, _mm512_setzero_si512());
13556        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
13557        #[rustfmt::skip]
13558        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13559                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13560        assert_eq_m512i(r, e);
13561    }
13562
13563    #[simd_test(enable = "avx512bw,avx512vl")]
13564    const fn test_mm256_mask_subs_epu16() {
13565        let a = _mm256_set1_epi16(1);
13566        let b = _mm256_set1_epi16(u16::MAX as i16);
13567        let r = _mm256_mask_subs_epu16(a, 0, a, b);
13568        assert_eq_m256i(r, a);
13569        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
13570        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13571        assert_eq_m256i(r, e);
13572    }
13573
13574    #[simd_test(enable = "avx512bw,avx512vl")]
13575    const fn test_mm256_maskz_subs_epu16() {
13576        let a = _mm256_set1_epi16(1);
13577        let b = _mm256_set1_epi16(u16::MAX as i16);
13578        let r = _mm256_maskz_subs_epu16(0, a, b);
13579        assert_eq_m256i(r, _mm256_setzero_si256());
13580        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
13581        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13582        assert_eq_m256i(r, e);
13583    }
13584
13585    #[simd_test(enable = "avx512bw,avx512vl")]
13586    const fn test_mm_mask_subs_epu16() {
13587        let a = _mm_set1_epi16(1);
13588        let b = _mm_set1_epi16(u16::MAX as i16);
13589        let r = _mm_mask_subs_epu16(a, 0, a, b);
13590        assert_eq_m128i(r, a);
13591        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
13592        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13593        assert_eq_m128i(r, e);
13594    }
13595
13596    #[simd_test(enable = "avx512bw,avx512vl")]
13597    const fn test_mm_maskz_subs_epu16() {
13598        let a = _mm_set1_epi16(1);
13599        let b = _mm_set1_epi16(u16::MAX as i16);
13600        let r = _mm_maskz_subs_epu16(0, a, b);
13601        assert_eq_m128i(r, _mm_setzero_si128());
13602        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
13603        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13604        assert_eq_m128i(r, e);
13605    }
13606
13607    #[simd_test(enable = "avx512bw")]
13608    const fn test_mm512_subs_epu8() {
13609        let a = _mm512_set1_epi8(1);
13610        let b = _mm512_set1_epi8(u8::MAX as i8);
13611        let r = _mm512_subs_epu8(a, b);
13612        let e = _mm512_set1_epi8(0);
13613        assert_eq_m512i(r, e);
13614    }
13615
13616    #[simd_test(enable = "avx512bw")]
13617    const fn test_mm512_mask_subs_epu8() {
13618        let a = _mm512_set1_epi8(1);
13619        let b = _mm512_set1_epi8(u8::MAX as i8);
13620        let r = _mm512_mask_subs_epu8(a, 0, a, b);
13621        assert_eq_m512i(r, a);
13622        let r = _mm512_mask_subs_epu8(
13623            a,
13624            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13625            a,
13626            b,
13627        );
13628        #[rustfmt::skip]
13629        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13630                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13631                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13632                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13633        assert_eq_m512i(r, e);
13634    }
13635
13636    #[simd_test(enable = "avx512bw")]
13637    const fn test_mm512_maskz_subs_epu8() {
13638        let a = _mm512_set1_epi8(1);
13639        let b = _mm512_set1_epi8(u8::MAX as i8);
13640        let r = _mm512_maskz_subs_epu8(0, a, b);
13641        assert_eq_m512i(r, _mm512_setzero_si512());
13642        let r = _mm512_maskz_subs_epu8(
13643            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13644            a,
13645            b,
13646        );
13647        #[rustfmt::skip]
13648        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13649                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13650                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13651                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13652        assert_eq_m512i(r, e);
13653    }
13654
13655    #[simd_test(enable = "avx512bw,avx512vl")]
13656    const fn test_mm256_mask_subs_epu8() {
13657        let a = _mm256_set1_epi8(1);
13658        let b = _mm256_set1_epi8(u8::MAX as i8);
13659        let r = _mm256_mask_subs_epu8(a, 0, a, b);
13660        assert_eq_m256i(r, a);
13661        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13662        #[rustfmt::skip]
13663        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13664                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13665        assert_eq_m256i(r, e);
13666    }
13667
13668    #[simd_test(enable = "avx512bw,avx512vl")]
13669    const fn test_mm256_maskz_subs_epu8() {
13670        let a = _mm256_set1_epi8(1);
13671        let b = _mm256_set1_epi8(u8::MAX as i8);
13672        let r = _mm256_maskz_subs_epu8(0, a, b);
13673        assert_eq_m256i(r, _mm256_setzero_si256());
13674        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
13675        #[rustfmt::skip]
13676        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13677                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13678        assert_eq_m256i(r, e);
13679    }
13680
13681    #[simd_test(enable = "avx512bw,avx512vl")]
13682    const fn test_mm_mask_subs_epu8() {
13683        let a = _mm_set1_epi8(1);
13684        let b = _mm_set1_epi8(u8::MAX as i8);
13685        let r = _mm_mask_subs_epu8(a, 0, a, b);
13686        assert_eq_m128i(r, a);
13687        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
13688        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13689        assert_eq_m128i(r, e);
13690    }
13691
13692    #[simd_test(enable = "avx512bw,avx512vl")]
13693    const fn test_mm_maskz_subs_epu8() {
13694        let a = _mm_set1_epi8(1);
13695        let b = _mm_set1_epi8(u8::MAX as i8);
13696        let r = _mm_maskz_subs_epu8(0, a, b);
13697        assert_eq_m128i(r, _mm_setzero_si128());
13698        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
13699        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13700        assert_eq_m128i(r, e);
13701    }
13702
13703    #[simd_test(enable = "avx512bw")]
13704    const fn test_mm512_subs_epi16() {
13705        let a = _mm512_set1_epi16(-1);
13706        let b = _mm512_set1_epi16(i16::MAX);
13707        let r = _mm512_subs_epi16(a, b);
13708        let e = _mm512_set1_epi16(i16::MIN);
13709        assert_eq_m512i(r, e);
13710    }
13711
13712    #[simd_test(enable = "avx512bw")]
13713    const fn test_mm512_mask_subs_epi16() {
13714        let a = _mm512_set1_epi16(-1);
13715        let b = _mm512_set1_epi16(i16::MAX);
13716        let r = _mm512_mask_subs_epi16(a, 0, a, b);
13717        assert_eq_m512i(r, a);
13718        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13719        #[rustfmt::skip]
13720        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13721                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13722        assert_eq_m512i(r, e);
13723    }
13724
13725    #[simd_test(enable = "avx512bw")]
13726    const fn test_mm512_maskz_subs_epi16() {
13727        let a = _mm512_set1_epi16(-1);
13728        let b = _mm512_set1_epi16(i16::MAX);
13729        let r = _mm512_maskz_subs_epi16(0, a, b);
13730        assert_eq_m512i(r, _mm512_setzero_si512());
13731        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13732        #[rustfmt::skip]
13733        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13734                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13735        assert_eq_m512i(r, e);
13736    }
13737
13738    #[simd_test(enable = "avx512bw,avx512vl")]
13739    const fn test_mm256_mask_subs_epi16() {
13740        let a = _mm256_set1_epi16(-1);
13741        let b = _mm256_set1_epi16(i16::MAX);
13742        let r = _mm256_mask_subs_epi16(a, 0, a, b);
13743        assert_eq_m256i(r, a);
13744        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
13745        #[rustfmt::skip]
13746        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13747        assert_eq_m256i(r, e);
13748    }
13749
13750    #[simd_test(enable = "avx512bw,avx512vl")]
13751    const fn test_mm256_maskz_subs_epi16() {
13752        let a = _mm256_set1_epi16(-1);
13753        let b = _mm256_set1_epi16(i16::MAX);
13754        let r = _mm256_maskz_subs_epi16(0, a, b);
13755        assert_eq_m256i(r, _mm256_setzero_si256());
13756        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
13757        #[rustfmt::skip]
13758        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13759        assert_eq_m256i(r, e);
13760    }
13761
13762    #[simd_test(enable = "avx512bw,avx512vl")]
13763    const fn test_mm_mask_subs_epi16() {
13764        let a = _mm_set1_epi16(-1);
13765        let b = _mm_set1_epi16(i16::MAX);
13766        let r = _mm_mask_subs_epi16(a, 0, a, b);
13767        assert_eq_m128i(r, a);
13768        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
13769        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13770        assert_eq_m128i(r, e);
13771    }
13772
13773    #[simd_test(enable = "avx512bw,avx512vl")]
13774    const fn test_mm_maskz_subs_epi16() {
13775        let a = _mm_set1_epi16(-1);
13776        let b = _mm_set1_epi16(i16::MAX);
13777        let r = _mm_maskz_subs_epi16(0, a, b);
13778        assert_eq_m128i(r, _mm_setzero_si128());
13779        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
13780        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13781        assert_eq_m128i(r, e);
13782    }
13783
13784    #[simd_test(enable = "avx512bw")]
13785    const fn test_mm512_subs_epi8() {
13786        let a = _mm512_set1_epi8(-1);
13787        let b = _mm512_set1_epi8(i8::MAX);
13788        let r = _mm512_subs_epi8(a, b);
13789        let e = _mm512_set1_epi8(i8::MIN);
13790        assert_eq_m512i(r, e);
13791    }
13792
13793    #[simd_test(enable = "avx512bw")]
13794    const fn test_mm512_mask_subs_epi8() {
13795        let a = _mm512_set1_epi8(-1);
13796        let b = _mm512_set1_epi8(i8::MAX);
13797        let r = _mm512_mask_subs_epi8(a, 0, a, b);
13798        assert_eq_m512i(r, a);
13799        let r = _mm512_mask_subs_epi8(
13800            a,
13801            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13802            a,
13803            b,
13804        );
13805        #[rustfmt::skip]
13806        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13807                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13808                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13809                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13810        assert_eq_m512i(r, e);
13811    }
13812
13813    #[simd_test(enable = "avx512bw")]
13814    const fn test_mm512_maskz_subs_epi8() {
13815        let a = _mm512_set1_epi8(-1);
13816        let b = _mm512_set1_epi8(i8::MAX);
13817        let r = _mm512_maskz_subs_epi8(0, a, b);
13818        assert_eq_m512i(r, _mm512_setzero_si512());
13819        let r = _mm512_maskz_subs_epi8(
13820            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13821            a,
13822            b,
13823        );
13824        #[rustfmt::skip]
13825        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13826                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13827                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13828                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13829        assert_eq_m512i(r, e);
13830    }
13831
13832    #[simd_test(enable = "avx512bw,avx512vl")]
13833    const fn test_mm256_mask_subs_epi8() {
13834        let a = _mm256_set1_epi8(-1);
13835        let b = _mm256_set1_epi8(i8::MAX);
13836        let r = _mm256_mask_subs_epi8(a, 0, a, b);
13837        assert_eq_m256i(r, a);
13838        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13839        #[rustfmt::skip]
13840        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13841                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13842        assert_eq_m256i(r, e);
13843    }
13844
13845    #[simd_test(enable = "avx512bw,avx512vl")]
13846    const fn test_mm256_maskz_subs_epi8() {
13847        let a = _mm256_set1_epi8(-1);
13848        let b = _mm256_set1_epi8(i8::MAX);
13849        let r = _mm256_maskz_subs_epi8(0, a, b);
13850        assert_eq_m256i(r, _mm256_setzero_si256());
13851        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
13852        #[rustfmt::skip]
13853        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13854                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13855        assert_eq_m256i(r, e);
13856    }
13857
13858    #[simd_test(enable = "avx512bw,avx512vl")]
13859    const fn test_mm_mask_subs_epi8() {
13860        let a = _mm_set1_epi8(-1);
13861        let b = _mm_set1_epi8(i8::MAX);
13862        let r = _mm_mask_subs_epi8(a, 0, a, b);
13863        assert_eq_m128i(r, a);
13864        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
13865        #[rustfmt::skip]
13866        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13867        assert_eq_m128i(r, e);
13868    }
13869
13870    #[simd_test(enable = "avx512bw,avx512vl")]
13871    const fn test_mm_maskz_subs_epi8() {
13872        let a = _mm_set1_epi8(-1);
13873        let b = _mm_set1_epi8(i8::MAX);
13874        let r = _mm_maskz_subs_epi8(0, a, b);
13875        assert_eq_m128i(r, _mm_setzero_si128());
13876        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
13877        #[rustfmt::skip]
13878        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13879        assert_eq_m128i(r, e);
13880    }
13881
13882    #[simd_test(enable = "avx512bw")]
13883    const fn test_mm512_mulhi_epu16() {
13884        let a = _mm512_set1_epi16(1);
13885        let b = _mm512_set1_epi16(1);
13886        let r = _mm512_mulhi_epu16(a, b);
13887        let e = _mm512_set1_epi16(0);
13888        assert_eq_m512i(r, e);
13889    }
13890
13891    #[simd_test(enable = "avx512bw")]
13892    const fn test_mm512_mask_mulhi_epu16() {
13893        let a = _mm512_set1_epi16(1);
13894        let b = _mm512_set1_epi16(1);
13895        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13896        assert_eq_m512i(r, a);
13897        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13898        #[rustfmt::skip]
13899        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13900                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13901        assert_eq_m512i(r, e);
13902    }
13903
13904    #[simd_test(enable = "avx512bw")]
13905    const fn test_mm512_maskz_mulhi_epu16() {
13906        let a = _mm512_set1_epi16(1);
13907        let b = _mm512_set1_epi16(1);
13908        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13909        assert_eq_m512i(r, _mm512_setzero_si512());
13910        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13911        #[rustfmt::skip]
13912        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13913                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13914        assert_eq_m512i(r, e);
13915    }
13916
13917    #[simd_test(enable = "avx512bw,avx512vl")]
13918    const fn test_mm256_mask_mulhi_epu16() {
13919        let a = _mm256_set1_epi16(1);
13920        let b = _mm256_set1_epi16(1);
13921        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13922        assert_eq_m256i(r, a);
13923        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13924        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13925        assert_eq_m256i(r, e);
13926    }
13927
13928    #[simd_test(enable = "avx512bw,avx512vl")]
13929    const fn test_mm256_maskz_mulhi_epu16() {
13930        let a = _mm256_set1_epi16(1);
13931        let b = _mm256_set1_epi16(1);
13932        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13933        assert_eq_m256i(r, _mm256_setzero_si256());
13934        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13935        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13936        assert_eq_m256i(r, e);
13937    }
13938
13939    #[simd_test(enable = "avx512bw,avx512vl")]
13940    const fn test_mm_mask_mulhi_epu16() {
13941        let a = _mm_set1_epi16(1);
13942        let b = _mm_set1_epi16(1);
13943        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13944        assert_eq_m128i(r, a);
13945        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13946        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13947        assert_eq_m128i(r, e);
13948    }
13949
13950    #[simd_test(enable = "avx512bw,avx512vl")]
13951    const fn test_mm_maskz_mulhi_epu16() {
13952        let a = _mm_set1_epi16(1);
13953        let b = _mm_set1_epi16(1);
13954        let r = _mm_maskz_mulhi_epu16(0, a, b);
13955        assert_eq_m128i(r, _mm_setzero_si128());
13956        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13957        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13958        assert_eq_m128i(r, e);
13959    }
13960
13961    #[simd_test(enable = "avx512bw")]
13962    const fn test_mm512_mulhi_epi16() {
13963        let a = _mm512_set1_epi16(1);
13964        let b = _mm512_set1_epi16(1);
13965        let r = _mm512_mulhi_epi16(a, b);
13966        let e = _mm512_set1_epi16(0);
13967        assert_eq_m512i(r, e);
13968    }
13969
13970    #[simd_test(enable = "avx512bw")]
13971    const fn test_mm512_mask_mulhi_epi16() {
13972        let a = _mm512_set1_epi16(1);
13973        let b = _mm512_set1_epi16(1);
13974        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13975        assert_eq_m512i(r, a);
13976        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13977        #[rustfmt::skip]
13978        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13979                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13980        assert_eq_m512i(r, e);
13981    }
13982
13983    #[simd_test(enable = "avx512bw")]
13984    const fn test_mm512_maskz_mulhi_epi16() {
13985        let a = _mm512_set1_epi16(1);
13986        let b = _mm512_set1_epi16(1);
13987        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13988        assert_eq_m512i(r, _mm512_setzero_si512());
13989        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13990        #[rustfmt::skip]
13991        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13992                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13993        assert_eq_m512i(r, e);
13994    }
13995
13996    #[simd_test(enable = "avx512bw,avx512vl")]
13997    const fn test_mm256_mask_mulhi_epi16() {
13998        let a = _mm256_set1_epi16(1);
13999        let b = _mm256_set1_epi16(1);
14000        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
14001        assert_eq_m256i(r, a);
14002        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
14003        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14004        assert_eq_m256i(r, e);
14005    }
14006
14007    #[simd_test(enable = "avx512bw,avx512vl")]
14008    const fn test_mm256_maskz_mulhi_epi16() {
14009        let a = _mm256_set1_epi16(1);
14010        let b = _mm256_set1_epi16(1);
14011        let r = _mm256_maskz_mulhi_epi16(0, a, b);
14012        assert_eq_m256i(r, _mm256_setzero_si256());
14013        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
14014        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14015        assert_eq_m256i(r, e);
14016    }
14017
14018    #[simd_test(enable = "avx512bw,avx512vl")]
14019    const fn test_mm_mask_mulhi_epi16() {
14020        let a = _mm_set1_epi16(1);
14021        let b = _mm_set1_epi16(1);
14022        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
14023        assert_eq_m128i(r, a);
14024        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
14025        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14026        assert_eq_m128i(r, e);
14027    }
14028
14029    #[simd_test(enable = "avx512bw,avx512vl")]
14030    const fn test_mm_maskz_mulhi_epi16() {
14031        let a = _mm_set1_epi16(1);
14032        let b = _mm_set1_epi16(1);
14033        let r = _mm_maskz_mulhi_epi16(0, a, b);
14034        assert_eq_m128i(r, _mm_setzero_si128());
14035        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
14036        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14037        assert_eq_m128i(r, e);
14038    }
14039
14040    #[simd_test(enable = "avx512bw")]
14041    fn test_mm512_mulhrs_epi16() {
14042        let a = _mm512_set1_epi16(1);
14043        let b = _mm512_set1_epi16(1);
14044        let r = _mm512_mulhrs_epi16(a, b);
14045        let e = _mm512_set1_epi16(0);
14046        assert_eq_m512i(r, e);
14047    }
14048
14049    #[simd_test(enable = "avx512bw")]
14050    fn test_mm512_mask_mulhrs_epi16() {
14051        let a = _mm512_set1_epi16(1);
14052        let b = _mm512_set1_epi16(1);
14053        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
14054        assert_eq_m512i(r, a);
14055        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14056        #[rustfmt::skip]
14057        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14058                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14059        assert_eq_m512i(r, e);
14060    }
14061
14062    #[simd_test(enable = "avx512bw")]
14063    fn test_mm512_maskz_mulhrs_epi16() {
14064        let a = _mm512_set1_epi16(1);
14065        let b = _mm512_set1_epi16(1);
14066        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
14067        assert_eq_m512i(r, _mm512_setzero_si512());
14068        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
14069        #[rustfmt::skip]
14070        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14071                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14072        assert_eq_m512i(r, e);
14073    }
14074
14075    #[simd_test(enable = "avx512bw,avx512vl")]
14076    fn test_mm256_mask_mulhrs_epi16() {
14077        let a = _mm256_set1_epi16(1);
14078        let b = _mm256_set1_epi16(1);
14079        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
14080        assert_eq_m256i(r, a);
14081        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
14082        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14083        assert_eq_m256i(r, e);
14084    }
14085
14086    #[simd_test(enable = "avx512bw,avx512vl")]
14087    fn test_mm256_maskz_mulhrs_epi16() {
14088        let a = _mm256_set1_epi16(1);
14089        let b = _mm256_set1_epi16(1);
14090        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
14091        assert_eq_m256i(r, _mm256_setzero_si256());
14092        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
14093        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14094        assert_eq_m256i(r, e);
14095    }
14096
14097    #[simd_test(enable = "avx512bw,avx512vl")]
14098    fn test_mm_mask_mulhrs_epi16() {
14099        let a = _mm_set1_epi16(1);
14100        let b = _mm_set1_epi16(1);
14101        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
14102        assert_eq_m128i(r, a);
14103        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
14104        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14105        assert_eq_m128i(r, e);
14106    }
14107
14108    #[simd_test(enable = "avx512bw,avx512vl")]
14109    fn test_mm_maskz_mulhrs_epi16() {
14110        let a = _mm_set1_epi16(1);
14111        let b = _mm_set1_epi16(1);
14112        let r = _mm_maskz_mulhrs_epi16(0, a, b);
14113        assert_eq_m128i(r, _mm_setzero_si128());
14114        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
14115        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14116        assert_eq_m128i(r, e);
14117    }
14118
14119    #[simd_test(enable = "avx512bw")]
14120    const fn test_mm512_mullo_epi16() {
14121        let a = _mm512_set1_epi16(1);
14122        let b = _mm512_set1_epi16(1);
14123        let r = _mm512_mullo_epi16(a, b);
14124        let e = _mm512_set1_epi16(1);
14125        assert_eq_m512i(r, e);
14126    }
14127
14128    #[simd_test(enable = "avx512bw")]
14129    const fn test_mm512_mask_mullo_epi16() {
14130        let a = _mm512_set1_epi16(1);
14131        let b = _mm512_set1_epi16(1);
14132        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
14133        assert_eq_m512i(r, a);
14134        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14135        #[rustfmt::skip]
14136        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14137                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14138        assert_eq_m512i(r, e);
14139    }
14140
14141    #[simd_test(enable = "avx512bw")]
14142    const fn test_mm512_maskz_mullo_epi16() {
14143        let a = _mm512_set1_epi16(1);
14144        let b = _mm512_set1_epi16(1);
14145        let r = _mm512_maskz_mullo_epi16(0, a, b);
14146        assert_eq_m512i(r, _mm512_setzero_si512());
14147        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
14148        #[rustfmt::skip]
14149        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14150                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14151        assert_eq_m512i(r, e);
14152    }
14153
14154    #[simd_test(enable = "avx512bw,avx512vl")]
14155    const fn test_mm256_mask_mullo_epi16() {
14156        let a = _mm256_set1_epi16(1);
14157        let b = _mm256_set1_epi16(1);
14158        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
14159        assert_eq_m256i(r, a);
14160        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
14161        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14162        assert_eq_m256i(r, e);
14163    }
14164
14165    #[simd_test(enable = "avx512bw,avx512vl")]
14166    const fn test_mm256_maskz_mullo_epi16() {
14167        let a = _mm256_set1_epi16(1);
14168        let b = _mm256_set1_epi16(1);
14169        let r = _mm256_maskz_mullo_epi16(0, a, b);
14170        assert_eq_m256i(r, _mm256_setzero_si256());
14171        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
14172        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14173        assert_eq_m256i(r, e);
14174    }
14175
14176    #[simd_test(enable = "avx512bw,avx512vl")]
14177    const fn test_mm_mask_mullo_epi16() {
14178        let a = _mm_set1_epi16(1);
14179        let b = _mm_set1_epi16(1);
14180        let r = _mm_mask_mullo_epi16(a, 0, a, b);
14181        assert_eq_m128i(r, a);
14182        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
14183        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
14184        assert_eq_m128i(r, e);
14185    }
14186
14187    #[simd_test(enable = "avx512bw,avx512vl")]
14188    const fn test_mm_maskz_mullo_epi16() {
14189        let a = _mm_set1_epi16(1);
14190        let b = _mm_set1_epi16(1);
14191        let r = _mm_maskz_mullo_epi16(0, a, b);
14192        assert_eq_m128i(r, _mm_setzero_si128());
14193        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
14194        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
14195        assert_eq_m128i(r, e);
14196    }
14197
14198    #[simd_test(enable = "avx512bw")]
14199    const fn test_mm512_max_epu16() {
14200        #[rustfmt::skip]
14201        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14202                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14203        #[rustfmt::skip]
14204        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14205                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14206        let r = _mm512_max_epu16(a, b);
14207        #[rustfmt::skip]
14208        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14209                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14210        assert_eq_m512i(r, e);
14211    }
14212
14213    #[simd_test(enable = "avx512bw")]
14214    const fn test_mm512_mask_max_epu16() {
14215        #[rustfmt::skip]
14216        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14217                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14218        #[rustfmt::skip]
14219        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14220                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14221        let r = _mm512_mask_max_epu16(a, 0, a, b);
14222        assert_eq_m512i(r, a);
14223        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14224        #[rustfmt::skip]
14225        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14226                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14227        assert_eq_m512i(r, e);
14228    }
14229
14230    #[simd_test(enable = "avx512bw")]
14231    const fn test_mm512_maskz_max_epu16() {
14232        #[rustfmt::skip]
14233        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14234                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14235        #[rustfmt::skip]
14236        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14237                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14238        let r = _mm512_maskz_max_epu16(0, a, b);
14239        assert_eq_m512i(r, _mm512_setzero_si512());
14240        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
14241        #[rustfmt::skip]
14242        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14243                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14244        assert_eq_m512i(r, e);
14245    }
14246
14247    #[simd_test(enable = "avx512bw,avx512vl")]
14248    const fn test_mm256_mask_max_epu16() {
14249        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14250        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14251        let r = _mm256_mask_max_epu16(a, 0, a, b);
14252        assert_eq_m256i(r, a);
14253        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
14254        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14255        assert_eq_m256i(r, e);
14256    }
14257
14258    #[simd_test(enable = "avx512bw,avx512vl")]
14259    const fn test_mm256_maskz_max_epu16() {
14260        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14261        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14262        let r = _mm256_maskz_max_epu16(0, a, b);
14263        assert_eq_m256i(r, _mm256_setzero_si256());
14264        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
14265        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14266        assert_eq_m256i(r, e);
14267    }
14268
14269    #[simd_test(enable = "avx512bw,avx512vl")]
14270    const fn test_mm_mask_max_epu16() {
14271        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14272        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14273        let r = _mm_mask_max_epu16(a, 0, a, b);
14274        assert_eq_m128i(r, a);
14275        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
14276        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14277        assert_eq_m128i(r, e);
14278    }
14279
14280    #[simd_test(enable = "avx512bw,avx512vl")]
14281    const fn test_mm_maskz_max_epu16() {
14282        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14283        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14284        let r = _mm_maskz_max_epu16(0, a, b);
14285        assert_eq_m128i(r, _mm_setzero_si128());
14286        let r = _mm_maskz_max_epu16(0b00001111, a, b);
14287        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14288        assert_eq_m128i(r, e);
14289    }
14290
14291    #[simd_test(enable = "avx512bw")]
14292    const fn test_mm512_max_epu8() {
14293        #[rustfmt::skip]
14294        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14295                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14296                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14297                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14298        #[rustfmt::skip]
14299        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14300                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14301                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14302                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14303        let r = _mm512_max_epu8(a, b);
14304        #[rustfmt::skip]
14305        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14306                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14307                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14308                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14309        assert_eq_m512i(r, e);
14310    }
14311
14312    #[simd_test(enable = "avx512bw")]
14313    const fn test_mm512_mask_max_epu8() {
14314        #[rustfmt::skip]
14315        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14316                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14317                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14318                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14319        #[rustfmt::skip]
14320        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14321                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14322                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14323                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14324        let r = _mm512_mask_max_epu8(a, 0, a, b);
14325        assert_eq_m512i(r, a);
14326        let r = _mm512_mask_max_epu8(
14327            a,
14328            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14329            a,
14330            b,
14331        );
14332        #[rustfmt::skip]
14333        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14334                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14335                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14336                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14337        assert_eq_m512i(r, e);
14338    }
14339
14340    #[simd_test(enable = "avx512bw")]
14341    const fn test_mm512_maskz_max_epu8() {
14342        #[rustfmt::skip]
14343        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14344                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14345                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14346                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14347        #[rustfmt::skip]
14348        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14349                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14350                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14351                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14352        let r = _mm512_maskz_max_epu8(0, a, b);
14353        assert_eq_m512i(r, _mm512_setzero_si512());
14354        let r = _mm512_maskz_max_epu8(
14355            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14356            a,
14357            b,
14358        );
14359        #[rustfmt::skip]
14360        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14361                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14362                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14363                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14364        assert_eq_m512i(r, e);
14365    }
14366
14367    #[simd_test(enable = "avx512bw,avx512vl")]
14368    const fn test_mm256_mask_max_epu8() {
14369        #[rustfmt::skip]
14370        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14371                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14372        #[rustfmt::skip]
14373        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14374                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14375        let r = _mm256_mask_max_epu8(a, 0, a, b);
14376        assert_eq_m256i(r, a);
14377        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14378        #[rustfmt::skip]
14379        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14380                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14381        assert_eq_m256i(r, e);
14382    }
14383
14384    #[simd_test(enable = "avx512bw,avx512vl")]
14385    const fn test_mm256_maskz_max_epu8() {
14386        #[rustfmt::skip]
14387        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14388                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14389        #[rustfmt::skip]
14390        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14391                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14392        let r = _mm256_maskz_max_epu8(0, a, b);
14393        assert_eq_m256i(r, _mm256_setzero_si256());
14394        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
14395        #[rustfmt::skip]
14396        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14397                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14398        assert_eq_m256i(r, e);
14399    }
14400
14401    #[simd_test(enable = "avx512bw,avx512vl")]
14402    const fn test_mm_mask_max_epu8() {
14403        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14404        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14405        let r = _mm_mask_max_epu8(a, 0, a, b);
14406        assert_eq_m128i(r, a);
14407        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
14408        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14409        assert_eq_m128i(r, e);
14410    }
14411
14412    #[simd_test(enable = "avx512bw,avx512vl")]
14413    const fn test_mm_maskz_max_epu8() {
14414        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14415        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14416        let r = _mm_maskz_max_epu8(0, a, b);
14417        assert_eq_m128i(r, _mm_setzero_si128());
14418        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
14419        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14420        assert_eq_m128i(r, e);
14421    }
14422
14423    #[simd_test(enable = "avx512bw")]
14424    const fn test_mm512_max_epi16() {
14425        #[rustfmt::skip]
14426        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14427                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14428        #[rustfmt::skip]
14429        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14430                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14431        let r = _mm512_max_epi16(a, b);
14432        #[rustfmt::skip]
14433        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14434                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14435        assert_eq_m512i(r, e);
14436    }
14437
14438    #[simd_test(enable = "avx512bw")]
14439    const fn test_mm512_mask_max_epi16() {
14440        #[rustfmt::skip]
14441        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14442                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14443        #[rustfmt::skip]
14444        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14445                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14446        let r = _mm512_mask_max_epi16(a, 0, a, b);
14447        assert_eq_m512i(r, a);
14448        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14449        #[rustfmt::skip]
14450        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14451                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14452        assert_eq_m512i(r, e);
14453    }
14454
14455    #[simd_test(enable = "avx512bw")]
14456    const fn test_mm512_maskz_max_epi16() {
14457        #[rustfmt::skip]
14458        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14459                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14460        #[rustfmt::skip]
14461        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14462                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14463        let r = _mm512_maskz_max_epi16(0, a, b);
14464        assert_eq_m512i(r, _mm512_setzero_si512());
14465        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
14466        #[rustfmt::skip]
14467        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14468                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14469        assert_eq_m512i(r, e);
14470    }
14471
14472    #[simd_test(enable = "avx512bw,avx512vl")]
14473    const fn test_mm256_mask_max_epi16() {
14474        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14475        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14476        let r = _mm256_mask_max_epi16(a, 0, a, b);
14477        assert_eq_m256i(r, a);
14478        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
14479        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14480        assert_eq_m256i(r, e);
14481    }
14482
14483    #[simd_test(enable = "avx512bw,avx512vl")]
14484    const fn test_mm256_maskz_max_epi16() {
14485        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14486        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14487        let r = _mm256_maskz_max_epi16(0, a, b);
14488        assert_eq_m256i(r, _mm256_setzero_si256());
14489        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
14490        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14491        assert_eq_m256i(r, e);
14492    }
14493
14494    #[simd_test(enable = "avx512bw,avx512vl")]
14495    const fn test_mm_mask_max_epi16() {
14496        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14497        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14498        let r = _mm_mask_max_epi16(a, 0, a, b);
14499        assert_eq_m128i(r, a);
14500        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
14501        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14502        assert_eq_m128i(r, e);
14503    }
14504
14505    #[simd_test(enable = "avx512bw,avx512vl")]
14506    const fn test_mm_maskz_max_epi16() {
14507        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14508        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14509        let r = _mm_maskz_max_epi16(0, a, b);
14510        assert_eq_m128i(r, _mm_setzero_si128());
14511        let r = _mm_maskz_max_epi16(0b00001111, a, b);
14512        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14513        assert_eq_m128i(r, e);
14514    }
14515
14516    #[simd_test(enable = "avx512bw")]
14517    const fn test_mm512_max_epi8() {
14518        #[rustfmt::skip]
14519        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14520                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14521                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14522                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14523        #[rustfmt::skip]
14524        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14525                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14526                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14527                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14528        let r = _mm512_max_epi8(a, b);
14529        #[rustfmt::skip]
14530        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14531                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14532                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14533                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14534        assert_eq_m512i(r, e);
14535    }
14536
14537    #[simd_test(enable = "avx512bw")]
14538    const fn test_mm512_mask_max_epi8() {
14539        #[rustfmt::skip]
14540        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14541                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14542                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14543                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14544        #[rustfmt::skip]
14545        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14546                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14547                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14548                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14549        let r = _mm512_mask_max_epi8(a, 0, a, b);
14550        assert_eq_m512i(r, a);
14551        let r = _mm512_mask_max_epi8(
14552            a,
14553            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14554            a,
14555            b,
14556        );
14557        #[rustfmt::skip]
14558        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14559                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14560                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14561                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14562        assert_eq_m512i(r, e);
14563    }
14564
14565    #[simd_test(enable = "avx512bw")]
14566    const fn test_mm512_maskz_max_epi8() {
14567        #[rustfmt::skip]
14568        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14569                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14570                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14571                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14572        #[rustfmt::skip]
14573        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14574                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14575                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14576                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14577        let r = _mm512_maskz_max_epi8(0, a, b);
14578        assert_eq_m512i(r, _mm512_setzero_si512());
14579        let r = _mm512_maskz_max_epi8(
14580            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14581            a,
14582            b,
14583        );
14584        #[rustfmt::skip]
14585        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14586                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14587                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14588                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14589        assert_eq_m512i(r, e);
14590    }
14591
14592    #[simd_test(enable = "avx512bw,avx512vl")]
14593    const fn test_mm256_mask_max_epi8() {
14594        #[rustfmt::skip]
14595        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14596                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14597        #[rustfmt::skip]
14598        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14599                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14600        let r = _mm256_mask_max_epi8(a, 0, a, b);
14601        assert_eq_m256i(r, a);
14602        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14603        #[rustfmt::skip]
14604        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14605                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14606        assert_eq_m256i(r, e);
14607    }
14608
14609    #[simd_test(enable = "avx512bw,avx512vl")]
14610    const fn test_mm256_maskz_max_epi8() {
14611        #[rustfmt::skip]
14612        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14613                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14614        #[rustfmt::skip]
14615        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14616                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14617        let r = _mm256_maskz_max_epi8(0, a, b);
14618        assert_eq_m256i(r, _mm256_setzero_si256());
14619        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
14620        #[rustfmt::skip]
14621        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14622                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14623        assert_eq_m256i(r, e);
14624    }
14625
14626    #[simd_test(enable = "avx512bw,avx512vl")]
14627    const fn test_mm_mask_max_epi8() {
14628        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14629        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14630        let r = _mm_mask_max_epi8(a, 0, a, b);
14631        assert_eq_m128i(r, a);
14632        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
14633        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14634        assert_eq_m128i(r, e);
14635    }
14636
14637    #[simd_test(enable = "avx512bw,avx512vl")]
14638    const fn test_mm_maskz_max_epi8() {
14639        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14640        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14641        let r = _mm_maskz_max_epi8(0, a, b);
14642        assert_eq_m128i(r, _mm_setzero_si128());
14643        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
14644        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14645        assert_eq_m128i(r, e);
14646    }
14647
14648    #[simd_test(enable = "avx512bw")]
14649    const fn test_mm512_min_epu16() {
14650        #[rustfmt::skip]
14651        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14652                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14653        #[rustfmt::skip]
14654        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14655                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14656        let r = _mm512_min_epu16(a, b);
14657        #[rustfmt::skip]
14658        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14659                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14660        assert_eq_m512i(r, e);
14661    }
14662
14663    #[simd_test(enable = "avx512bw")]
14664    const fn test_mm512_mask_min_epu16() {
14665        #[rustfmt::skip]
14666        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14667                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14668        #[rustfmt::skip]
14669        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14670                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14671        let r = _mm512_mask_min_epu16(a, 0, a, b);
14672        assert_eq_m512i(r, a);
14673        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14674        #[rustfmt::skip]
14675        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14676                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14677        assert_eq_m512i(r, e);
14678    }
14679
14680    #[simd_test(enable = "avx512bw")]
14681    const fn test_mm512_maskz_min_epu16() {
14682        #[rustfmt::skip]
14683        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14684                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14685        #[rustfmt::skip]
14686        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14687                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14688        let r = _mm512_maskz_min_epu16(0, a, b);
14689        assert_eq_m512i(r, _mm512_setzero_si512());
14690        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
14691        #[rustfmt::skip]
14692        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14693                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14694        assert_eq_m512i(r, e);
14695    }
14696
14697    #[simd_test(enable = "avx512bw,avx512vl")]
14698    const fn test_mm256_mask_min_epu16() {
14699        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14700        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14701        let r = _mm256_mask_min_epu16(a, 0, a, b);
14702        assert_eq_m256i(r, a);
14703        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
14704        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14705        assert_eq_m256i(r, e);
14706    }
14707
14708    #[simd_test(enable = "avx512bw,avx512vl")]
14709    const fn test_mm256_maskz_min_epu16() {
14710        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14711        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14712        let r = _mm256_maskz_min_epu16(0, a, b);
14713        assert_eq_m256i(r, _mm256_setzero_si256());
14714        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
14715        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14716        assert_eq_m256i(r, e);
14717    }
14718
14719    #[simd_test(enable = "avx512bw,avx512vl")]
14720    const fn test_mm_mask_min_epu16() {
14721        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14722        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14723        let r = _mm_mask_min_epu16(a, 0, a, b);
14724        assert_eq_m128i(r, a);
14725        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
14726        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14727        assert_eq_m128i(r, e);
14728    }
14729
14730    #[simd_test(enable = "avx512bw,avx512vl")]
14731    const fn test_mm_maskz_min_epu16() {
14732        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14733        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14734        let r = _mm_maskz_min_epu16(0, a, b);
14735        assert_eq_m128i(r, _mm_setzero_si128());
14736        let r = _mm_maskz_min_epu16(0b00001111, a, b);
14737        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14738        assert_eq_m128i(r, e);
14739    }
14740
14741    #[simd_test(enable = "avx512bw")]
14742    const fn test_mm512_min_epu8() {
14743        #[rustfmt::skip]
14744        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14745                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14746                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14747                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14748        #[rustfmt::skip]
14749        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14750                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14751                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14752                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14753        let r = _mm512_min_epu8(a, b);
14754        #[rustfmt::skip]
14755        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14756                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14757                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14758                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14759        assert_eq_m512i(r, e);
14760    }
14761
14762    #[simd_test(enable = "avx512bw")]
14763    const fn test_mm512_mask_min_epu8() {
14764        #[rustfmt::skip]
14765        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14766                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14767                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14768                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14769        #[rustfmt::skip]
14770        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14771                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14772                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14773                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14774        let r = _mm512_mask_min_epu8(a, 0, a, b);
14775        assert_eq_m512i(r, a);
14776        let r = _mm512_mask_min_epu8(
14777            a,
14778            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14779            a,
14780            b,
14781        );
14782        #[rustfmt::skip]
14783        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14784                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14785                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14786                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14787        assert_eq_m512i(r, e);
14788    }
14789
14790    #[simd_test(enable = "avx512bw")]
14791    const fn test_mm512_maskz_min_epu8() {
14792        #[rustfmt::skip]
14793        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14794                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14795                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14796                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14797        #[rustfmt::skip]
14798        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14799                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14800                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14801                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14802        let r = _mm512_maskz_min_epu8(0, a, b);
14803        assert_eq_m512i(r, _mm512_setzero_si512());
14804        let r = _mm512_maskz_min_epu8(
14805            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14806            a,
14807            b,
14808        );
14809        #[rustfmt::skip]
14810        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14811                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14812                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14813                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14814        assert_eq_m512i(r, e);
14815    }
14816
14817    #[simd_test(enable = "avx512bw,avx512vl")]
14818    const fn test_mm256_mask_min_epu8() {
14819        #[rustfmt::skip]
14820        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14821                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14822        #[rustfmt::skip]
14823        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14824                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14825        let r = _mm256_mask_min_epu8(a, 0, a, b);
14826        assert_eq_m256i(r, a);
14827        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14828        #[rustfmt::skip]
14829        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14830                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14831        assert_eq_m256i(r, e);
14832    }
14833
14834    #[simd_test(enable = "avx512bw,avx512vl")]
14835    const fn test_mm256_maskz_min_epu8() {
14836        #[rustfmt::skip]
14837        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14838                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14839        #[rustfmt::skip]
14840        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14841                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14842        let r = _mm256_maskz_min_epu8(0, a, b);
14843        assert_eq_m256i(r, _mm256_setzero_si256());
14844        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
14845        #[rustfmt::skip]
14846        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14847                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14848        assert_eq_m256i(r, e);
14849    }
14850
14851    #[simd_test(enable = "avx512bw,avx512vl")]
14852    const fn test_mm_mask_min_epu8() {
14853        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14854        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14855        let r = _mm_mask_min_epu8(a, 0, a, b);
14856        assert_eq_m128i(r, a);
14857        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
14858        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14859        assert_eq_m128i(r, e);
14860    }
14861
14862    #[simd_test(enable = "avx512bw,avx512vl")]
14863    const fn test_mm_maskz_min_epu8() {
14864        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14865        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14866        let r = _mm_maskz_min_epu8(0, a, b);
14867        assert_eq_m128i(r, _mm_setzero_si128());
14868        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
14869        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14870        assert_eq_m128i(r, e);
14871    }
14872
14873    #[simd_test(enable = "avx512bw")]
14874    const fn test_mm512_min_epi16() {
14875        #[rustfmt::skip]
14876        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14877                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14878        #[rustfmt::skip]
14879        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14880                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14881        let r = _mm512_min_epi16(a, b);
14882        #[rustfmt::skip]
14883        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14884                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14885        assert_eq_m512i(r, e);
14886    }
14887
14888    #[simd_test(enable = "avx512bw")]
14889    const fn test_mm512_mask_min_epi16() {
14890        #[rustfmt::skip]
14891        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14892                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14893        #[rustfmt::skip]
14894        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14895                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14896        let r = _mm512_mask_min_epi16(a, 0, a, b);
14897        assert_eq_m512i(r, a);
14898        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14899        #[rustfmt::skip]
14900        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14901                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14902        assert_eq_m512i(r, e);
14903    }
14904
14905    #[simd_test(enable = "avx512bw")]
14906    const fn test_mm512_maskz_min_epi16() {
14907        #[rustfmt::skip]
14908        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14909                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14910        #[rustfmt::skip]
14911        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14912                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14913        let r = _mm512_maskz_min_epi16(0, a, b);
14914        assert_eq_m512i(r, _mm512_setzero_si512());
14915        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14916        #[rustfmt::skip]
14917        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14918                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14919        assert_eq_m512i(r, e);
14920    }
14921
14922    #[simd_test(enable = "avx512bw,avx512vl")]
14923    const fn test_mm256_mask_min_epi16() {
14924        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14925        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14926        let r = _mm256_mask_min_epi16(a, 0, a, b);
14927        assert_eq_m256i(r, a);
14928        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14929        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14930        assert_eq_m256i(r, e);
14931    }
14932
14933    #[simd_test(enable = "avx512bw,avx512vl")]
14934    const fn test_mm256_maskz_min_epi16() {
14935        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14936        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14937        let r = _mm256_maskz_min_epi16(0, a, b);
14938        assert_eq_m256i(r, _mm256_setzero_si256());
14939        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14940        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14941        assert_eq_m256i(r, e);
14942    }
14943
14944    #[simd_test(enable = "avx512bw,avx512vl")]
14945    const fn test_mm_mask_min_epi16() {
14946        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14947        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14948        let r = _mm_mask_min_epi16(a, 0, a, b);
14949        assert_eq_m128i(r, a);
14950        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14951        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14952        assert_eq_m128i(r, e);
14953    }
14954
14955    #[simd_test(enable = "avx512bw,avx512vl")]
14956    const fn test_mm_maskz_min_epi16() {
14957        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14958        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14959        let r = _mm_maskz_min_epi16(0, a, b);
14960        assert_eq_m128i(r, _mm_setzero_si128());
14961        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14962        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14963        assert_eq_m128i(r, e);
14964    }
14965
14966    #[simd_test(enable = "avx512bw")]
14967    const fn test_mm512_min_epi8() {
14968        #[rustfmt::skip]
14969        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14970                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14971                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14972                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14973        #[rustfmt::skip]
14974        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14975                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14976                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14977                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14978        let r = _mm512_min_epi8(a, b);
14979        #[rustfmt::skip]
14980        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14981                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14982                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14983                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14984        assert_eq_m512i(r, e);
14985    }
14986
14987    #[simd_test(enable = "avx512bw")]
14988    const fn test_mm512_mask_min_epi8() {
14989        #[rustfmt::skip]
14990        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14991                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14992                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14993                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14994        #[rustfmt::skip]
14995        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14996                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14997                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14998                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14999        let r = _mm512_mask_min_epi8(a, 0, a, b);
15000        assert_eq_m512i(r, a);
15001        let r = _mm512_mask_min_epi8(
15002            a,
15003            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15004            a,
15005            b,
15006        );
15007        #[rustfmt::skip]
15008        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15009                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15010                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15011                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15012        assert_eq_m512i(r, e);
15013    }
15014
15015    #[simd_test(enable = "avx512bw")]
15016    const fn test_mm512_maskz_min_epi8() {
15017        #[rustfmt::skip]
15018        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15019                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15020                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15021                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15022        #[rustfmt::skip]
15023        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15024                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15025                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15026                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15027        let r = _mm512_maskz_min_epi8(0, a, b);
15028        assert_eq_m512i(r, _mm512_setzero_si512());
15029        let r = _mm512_maskz_min_epi8(
15030            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15031            a,
15032            b,
15033        );
15034        #[rustfmt::skip]
15035        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15036                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15037                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15038                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15039        assert_eq_m512i(r, e);
15040    }
15041
15042    #[simd_test(enable = "avx512bw,avx512vl")]
15043    const fn test_mm256_mask_min_epi8() {
15044        #[rustfmt::skip]
15045        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15046                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15047        #[rustfmt::skip]
15048        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15049                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15050        let r = _mm256_mask_min_epi8(a, 0, a, b);
15051        assert_eq_m256i(r, a);
15052        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
15053        #[rustfmt::skip]
15054        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15055                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15056        assert_eq_m256i(r, e);
15057    }
15058
15059    #[simd_test(enable = "avx512bw,avx512vl")]
15060    const fn test_mm256_maskz_min_epi8() {
15061        #[rustfmt::skip]
15062        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15063                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15064        #[rustfmt::skip]
15065        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15066                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15067        let r = _mm256_maskz_min_epi8(0, a, b);
15068        assert_eq_m256i(r, _mm256_setzero_si256());
15069        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
15070        #[rustfmt::skip]
15071        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15072                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15073        assert_eq_m256i(r, e);
15074    }
15075
15076    #[simd_test(enable = "avx512bw,avx512vl")]
15077    const fn test_mm_mask_min_epi8() {
15078        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15079        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15080        let r = _mm_mask_min_epi8(a, 0, a, b);
15081        assert_eq_m128i(r, a);
15082        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
15083        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15084        assert_eq_m128i(r, e);
15085    }
15086
15087    #[simd_test(enable = "avx512bw,avx512vl")]
15088    const fn test_mm_maskz_min_epi8() {
15089        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15090        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15091        let r = _mm_maskz_min_epi8(0, a, b);
15092        assert_eq_m128i(r, _mm_setzero_si128());
15093        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
15094        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15095        assert_eq_m128i(r, e);
15096    }
15097
15098    #[simd_test(enable = "avx512bw")]
15099    const fn test_mm512_cmplt_epu16_mask() {
15100        let a = _mm512_set1_epi16(-2);
15101        let b = _mm512_set1_epi16(-1);
15102        let m = _mm512_cmplt_epu16_mask(a, b);
15103        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15104    }
15105
15106    #[simd_test(enable = "avx512bw")]
15107    const fn test_mm512_mask_cmplt_epu16_mask() {
15108        let a = _mm512_set1_epi16(-2);
15109        let b = _mm512_set1_epi16(-1);
15110        let mask = 0b01010101_01010101_01010101_01010101;
15111        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
15112        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15113    }
15114
15115    #[simd_test(enable = "avx512bw,avx512vl")]
15116    const fn test_mm256_cmplt_epu16_mask() {
15117        let a = _mm256_set1_epi16(-2);
15118        let b = _mm256_set1_epi16(-1);
15119        let m = _mm256_cmplt_epu16_mask(a, b);
15120        assert_eq!(m, 0b11111111_11111111);
15121    }
15122
15123    #[simd_test(enable = "avx512bw,avx512vl")]
15124    const fn test_mm256_mask_cmplt_epu16_mask() {
15125        let a = _mm256_set1_epi16(-2);
15126        let b = _mm256_set1_epi16(-1);
15127        let mask = 0b01010101_01010101;
15128        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
15129        assert_eq!(r, 0b01010101_01010101);
15130    }
15131
15132    #[simd_test(enable = "avx512bw,avx512vl")]
15133    const fn test_mm_cmplt_epu16_mask() {
15134        let a = _mm_set1_epi16(-2);
15135        let b = _mm_set1_epi16(-1);
15136        let m = _mm_cmplt_epu16_mask(a, b);
15137        assert_eq!(m, 0b11111111);
15138    }
15139
15140    #[simd_test(enable = "avx512bw,avx512vl")]
15141    const fn test_mm_mask_cmplt_epu16_mask() {
15142        let a = _mm_set1_epi16(-2);
15143        let b = _mm_set1_epi16(-1);
15144        let mask = 0b01010101;
15145        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
15146        assert_eq!(r, 0b01010101);
15147    }
15148
15149    #[simd_test(enable = "avx512bw")]
15150    const fn test_mm512_cmplt_epu8_mask() {
15151        let a = _mm512_set1_epi8(-2);
15152        let b = _mm512_set1_epi8(-1);
15153        let m = _mm512_cmplt_epu8_mask(a, b);
15154        assert_eq!(
15155            m,
15156            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15157        );
15158    }
15159
15160    #[simd_test(enable = "avx512bw")]
15161    const fn test_mm512_mask_cmplt_epu8_mask() {
15162        let a = _mm512_set1_epi8(-2);
15163        let b = _mm512_set1_epi8(-1);
15164        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15165        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
15166        assert_eq!(
15167            r,
15168            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15169        );
15170    }
15171
15172    #[simd_test(enable = "avx512bw,avx512vl")]
15173    const fn test_mm256_cmplt_epu8_mask() {
15174        let a = _mm256_set1_epi8(-2);
15175        let b = _mm256_set1_epi8(-1);
15176        let m = _mm256_cmplt_epu8_mask(a, b);
15177        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15178    }
15179
15180    #[simd_test(enable = "avx512bw,avx512vl")]
15181    const fn test_mm256_mask_cmplt_epu8_mask() {
15182        let a = _mm256_set1_epi8(-2);
15183        let b = _mm256_set1_epi8(-1);
15184        let mask = 0b01010101_01010101_01010101_01010101;
15185        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
15186        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15187    }
15188
15189    #[simd_test(enable = "avx512bw,avx512vl")]
15190    const fn test_mm_cmplt_epu8_mask() {
15191        let a = _mm_set1_epi8(-2);
15192        let b = _mm_set1_epi8(-1);
15193        let m = _mm_cmplt_epu8_mask(a, b);
15194        assert_eq!(m, 0b11111111_11111111);
15195    }
15196
15197    #[simd_test(enable = "avx512bw,avx512vl")]
15198    const fn test_mm_mask_cmplt_epu8_mask() {
15199        let a = _mm_set1_epi8(-2);
15200        let b = _mm_set1_epi8(-1);
15201        let mask = 0b01010101_01010101;
15202        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
15203        assert_eq!(r, 0b01010101_01010101);
15204    }
15205
15206    #[simd_test(enable = "avx512bw")]
15207    const fn test_mm512_cmplt_epi16_mask() {
15208        let a = _mm512_set1_epi16(-2);
15209        let b = _mm512_set1_epi16(-1);
15210        let m = _mm512_cmplt_epi16_mask(a, b);
15211        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15212    }
15213
15214    #[simd_test(enable = "avx512bw")]
15215    const fn test_mm512_mask_cmplt_epi16_mask() {
15216        let a = _mm512_set1_epi16(-2);
15217        let b = _mm512_set1_epi16(-1);
15218        let mask = 0b01010101_01010101_01010101_01010101;
15219        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
15220        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15221    }
15222
15223    #[simd_test(enable = "avx512bw,avx512vl")]
15224    const fn test_mm256_cmplt_epi16_mask() {
15225        let a = _mm256_set1_epi16(-2);
15226        let b = _mm256_set1_epi16(-1);
15227        let m = _mm256_cmplt_epi16_mask(a, b);
15228        assert_eq!(m, 0b11111111_11111111);
15229    }
15230
15231    #[simd_test(enable = "avx512bw,avx512vl")]
15232    const fn test_mm256_mask_cmplt_epi16_mask() {
15233        let a = _mm256_set1_epi16(-2);
15234        let b = _mm256_set1_epi16(-1);
15235        let mask = 0b01010101_01010101;
15236        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
15237        assert_eq!(r, 0b01010101_01010101);
15238    }
15239
15240    #[simd_test(enable = "avx512bw,avx512vl")]
15241    const fn test_mm_cmplt_epi16_mask() {
15242        let a = _mm_set1_epi16(-2);
15243        let b = _mm_set1_epi16(-1);
15244        let m = _mm_cmplt_epi16_mask(a, b);
15245        assert_eq!(m, 0b11111111);
15246    }
15247
15248    #[simd_test(enable = "avx512bw,avx512vl")]
15249    const fn test_mm_mask_cmplt_epi16_mask() {
15250        let a = _mm_set1_epi16(-2);
15251        let b = _mm_set1_epi16(-1);
15252        let mask = 0b01010101;
15253        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
15254        assert_eq!(r, 0b01010101);
15255    }
15256
15257    #[simd_test(enable = "avx512bw")]
15258    const fn test_mm512_cmplt_epi8_mask() {
15259        let a = _mm512_set1_epi8(-2);
15260        let b = _mm512_set1_epi8(-1);
15261        let m = _mm512_cmplt_epi8_mask(a, b);
15262        assert_eq!(
15263            m,
15264            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15265        );
15266    }
15267
15268    #[simd_test(enable = "avx512bw")]
15269    const fn test_mm512_mask_cmplt_epi8_mask() {
15270        let a = _mm512_set1_epi8(-2);
15271        let b = _mm512_set1_epi8(-1);
15272        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15273        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
15274        assert_eq!(
15275            r,
15276            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15277        );
15278    }
15279
15280    #[simd_test(enable = "avx512bw,avx512vl")]
15281    const fn test_mm256_cmplt_epi8_mask() {
15282        let a = _mm256_set1_epi8(-2);
15283        let b = _mm256_set1_epi8(-1);
15284        let m = _mm256_cmplt_epi8_mask(a, b);
15285        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15286    }
15287
15288    #[simd_test(enable = "avx512bw,avx512vl")]
15289    const fn test_mm256_mask_cmplt_epi8_mask() {
15290        let a = _mm256_set1_epi8(-2);
15291        let b = _mm256_set1_epi8(-1);
15292        let mask = 0b01010101_01010101_01010101_01010101;
15293        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
15294        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15295    }
15296
15297    #[simd_test(enable = "avx512bw,avx512vl")]
15298    const fn test_mm_cmplt_epi8_mask() {
15299        let a = _mm_set1_epi8(-2);
15300        let b = _mm_set1_epi8(-1);
15301        let m = _mm_cmplt_epi8_mask(a, b);
15302        assert_eq!(m, 0b11111111_11111111);
15303    }
15304
15305    #[simd_test(enable = "avx512bw,avx512vl")]
15306    const fn test_mm_mask_cmplt_epi8_mask() {
15307        let a = _mm_set1_epi8(-2);
15308        let b = _mm_set1_epi8(-1);
15309        let mask = 0b01010101_01010101;
15310        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
15311        assert_eq!(r, 0b01010101_01010101);
15312    }
15313
15314    #[simd_test(enable = "avx512bw")]
15315    const fn test_mm512_cmpgt_epu16_mask() {
15316        let a = _mm512_set1_epi16(2);
15317        let b = _mm512_set1_epi16(1);
15318        let m = _mm512_cmpgt_epu16_mask(a, b);
15319        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15320    }
15321
15322    #[simd_test(enable = "avx512bw")]
15323    const fn test_mm512_mask_cmpgt_epu16_mask() {
15324        let a = _mm512_set1_epi16(2);
15325        let b = _mm512_set1_epi16(1);
15326        let mask = 0b01010101_01010101_01010101_01010101;
15327        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
15328        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15329    }
15330
15331    #[simd_test(enable = "avx512bw,avx512vl")]
15332    const fn test_mm256_cmpgt_epu16_mask() {
15333        let a = _mm256_set1_epi16(2);
15334        let b = _mm256_set1_epi16(1);
15335        let m = _mm256_cmpgt_epu16_mask(a, b);
15336        assert_eq!(m, 0b11111111_11111111);
15337    }
15338
15339    #[simd_test(enable = "avx512bw,avx512vl")]
15340    const fn test_mm256_mask_cmpgt_epu16_mask() {
15341        let a = _mm256_set1_epi16(2);
15342        let b = _mm256_set1_epi16(1);
15343        let mask = 0b01010101_01010101;
15344        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
15345        assert_eq!(r, 0b01010101_01010101);
15346    }
15347
15348    #[simd_test(enable = "avx512bw,avx512vl")]
15349    const fn test_mm_cmpgt_epu16_mask() {
15350        let a = _mm_set1_epi16(2);
15351        let b = _mm_set1_epi16(1);
15352        let m = _mm_cmpgt_epu16_mask(a, b);
15353        assert_eq!(m, 0b11111111);
15354    }
15355
15356    #[simd_test(enable = "avx512bw,avx512vl")]
15357    const fn test_mm_mask_cmpgt_epu16_mask() {
15358        let a = _mm_set1_epi16(2);
15359        let b = _mm_set1_epi16(1);
15360        let mask = 0b01010101;
15361        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
15362        assert_eq!(r, 0b01010101);
15363    }
15364
15365    #[simd_test(enable = "avx512bw")]
15366    const fn test_mm512_cmpgt_epu8_mask() {
15367        let a = _mm512_set1_epi8(2);
15368        let b = _mm512_set1_epi8(1);
15369        let m = _mm512_cmpgt_epu8_mask(a, b);
15370        assert_eq!(
15371            m,
15372            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15373        );
15374    }
15375
15376    #[simd_test(enable = "avx512bw")]
15377    const fn test_mm512_mask_cmpgt_epu8_mask() {
15378        let a = _mm512_set1_epi8(2);
15379        let b = _mm512_set1_epi8(1);
15380        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15381        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
15382        assert_eq!(
15383            r,
15384            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15385        );
15386    }
15387
15388    #[simd_test(enable = "avx512bw,avx512vl")]
15389    const fn test_mm256_cmpgt_epu8_mask() {
15390        let a = _mm256_set1_epi8(2);
15391        let b = _mm256_set1_epi8(1);
15392        let m = _mm256_cmpgt_epu8_mask(a, b);
15393        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15394    }
15395
15396    #[simd_test(enable = "avx512bw,avx512vl")]
15397    const fn test_mm256_mask_cmpgt_epu8_mask() {
15398        let a = _mm256_set1_epi8(2);
15399        let b = _mm256_set1_epi8(1);
15400        let mask = 0b01010101_01010101_01010101_01010101;
15401        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
15402        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15403    }
15404
15405    #[simd_test(enable = "avx512bw,avx512vl")]
15406    const fn test_mm_cmpgt_epu8_mask() {
15407        let a = _mm_set1_epi8(2);
15408        let b = _mm_set1_epi8(1);
15409        let m = _mm_cmpgt_epu8_mask(a, b);
15410        assert_eq!(m, 0b11111111_11111111);
15411    }
15412
15413    #[simd_test(enable = "avx512bw,avx512vl")]
15414    const fn test_mm_mask_cmpgt_epu8_mask() {
15415        let a = _mm_set1_epi8(2);
15416        let b = _mm_set1_epi8(1);
15417        let mask = 0b01010101_01010101;
15418        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
15419        assert_eq!(r, 0b01010101_01010101);
15420    }
15421
15422    #[simd_test(enable = "avx512bw")]
15423    const fn test_mm512_cmpgt_epi16_mask() {
15424        let a = _mm512_set1_epi16(2);
15425        let b = _mm512_set1_epi16(-1);
15426        let m = _mm512_cmpgt_epi16_mask(a, b);
15427        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15428    }
15429
15430    #[simd_test(enable = "avx512bw")]
15431    const fn test_mm512_mask_cmpgt_epi16_mask() {
15432        let a = _mm512_set1_epi16(2);
15433        let b = _mm512_set1_epi16(-1);
15434        let mask = 0b01010101_01010101_01010101_01010101;
15435        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
15436        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15437    }
15438
15439    #[simd_test(enable = "avx512bw,avx512vl")]
15440    const fn test_mm256_cmpgt_epi16_mask() {
15441        let a = _mm256_set1_epi16(2);
15442        let b = _mm256_set1_epi16(-1);
15443        let m = _mm256_cmpgt_epi16_mask(a, b);
15444        assert_eq!(m, 0b11111111_11111111);
15445    }
15446
15447    #[simd_test(enable = "avx512bw,avx512vl")]
15448    const fn test_mm256_mask_cmpgt_epi16_mask() {
15449        let a = _mm256_set1_epi16(2);
15450        let b = _mm256_set1_epi16(-1);
15451        let mask = 0b001010101_01010101;
15452        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
15453        assert_eq!(r, 0b01010101_01010101);
15454    }
15455
15456    #[simd_test(enable = "avx512bw,avx512vl")]
15457    const fn test_mm_cmpgt_epi16_mask() {
15458        let a = _mm_set1_epi16(2);
15459        let b = _mm_set1_epi16(-1);
15460        let m = _mm_cmpgt_epi16_mask(a, b);
15461        assert_eq!(m, 0b11111111);
15462    }
15463
15464    #[simd_test(enable = "avx512bw,avx512vl")]
15465    const fn test_mm_mask_cmpgt_epi16_mask() {
15466        let a = _mm_set1_epi16(2);
15467        let b = _mm_set1_epi16(-1);
15468        let mask = 0b01010101;
15469        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
15470        assert_eq!(r, 0b01010101);
15471    }
15472
15473    #[simd_test(enable = "avx512bw")]
15474    const fn test_mm512_cmpgt_epi8_mask() {
15475        let a = _mm512_set1_epi8(2);
15476        let b = _mm512_set1_epi8(-1);
15477        let m = _mm512_cmpgt_epi8_mask(a, b);
15478        assert_eq!(
15479            m,
15480            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15481        );
15482    }
15483
15484    #[simd_test(enable = "avx512bw")]
15485    const fn test_mm512_mask_cmpgt_epi8_mask() {
15486        let a = _mm512_set1_epi8(2);
15487        let b = _mm512_set1_epi8(-1);
15488        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15489        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
15490        assert_eq!(
15491            r,
15492            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15493        );
15494    }
15495
15496    #[simd_test(enable = "avx512bw,avx512vl")]
15497    const fn test_mm256_cmpgt_epi8_mask() {
15498        let a = _mm256_set1_epi8(2);
15499        let b = _mm256_set1_epi8(-1);
15500        let m = _mm256_cmpgt_epi8_mask(a, b);
15501        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15502    }
15503
15504    #[simd_test(enable = "avx512bw,avx512vl")]
15505    const fn test_mm256_mask_cmpgt_epi8_mask() {
15506        let a = _mm256_set1_epi8(2);
15507        let b = _mm256_set1_epi8(-1);
15508        let mask = 0b01010101_01010101_01010101_01010101;
15509        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
15510        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15511    }
15512
15513    #[simd_test(enable = "avx512bw,avx512vl")]
15514    const fn test_mm_cmpgt_epi8_mask() {
15515        let a = _mm_set1_epi8(2);
15516        let b = _mm_set1_epi8(-1);
15517        let m = _mm_cmpgt_epi8_mask(a, b);
15518        assert_eq!(m, 0b11111111_11111111);
15519    }
15520
15521    #[simd_test(enable = "avx512bw,avx512vl")]
15522    const fn test_mm_mask_cmpgt_epi8_mask() {
15523        let a = _mm_set1_epi8(2);
15524        let b = _mm_set1_epi8(-1);
15525        let mask = 0b01010101_01010101;
15526        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
15527        assert_eq!(r, 0b01010101_01010101);
15528    }
15529
15530    #[simd_test(enable = "avx512bw")]
15531    const fn test_mm512_cmple_epu16_mask() {
15532        let a = _mm512_set1_epi16(-1);
15533        let b = _mm512_set1_epi16(-1);
15534        let m = _mm512_cmple_epu16_mask(a, b);
15535        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15536    }
15537
15538    #[simd_test(enable = "avx512bw")]
15539    const fn test_mm512_mask_cmple_epu16_mask() {
15540        let a = _mm512_set1_epi16(-1);
15541        let b = _mm512_set1_epi16(-1);
15542        let mask = 0b01010101_01010101_01010101_01010101;
15543        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
15544        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15545    }
15546
15547    #[simd_test(enable = "avx512bw,avx512vl")]
15548    const fn test_mm256_cmple_epu16_mask() {
15549        let a = _mm256_set1_epi16(-1);
15550        let b = _mm256_set1_epi16(-1);
15551        let m = _mm256_cmple_epu16_mask(a, b);
15552        assert_eq!(m, 0b11111111_11111111);
15553    }
15554
15555    #[simd_test(enable = "avx512bw,avx512vl")]
15556    const fn test_mm256_mask_cmple_epu16_mask() {
15557        let a = _mm256_set1_epi16(-1);
15558        let b = _mm256_set1_epi16(-1);
15559        let mask = 0b01010101_01010101;
15560        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
15561        assert_eq!(r, 0b01010101_01010101);
15562    }
15563
15564    #[simd_test(enable = "avx512bw,avx512vl")]
15565    const fn test_mm_cmple_epu16_mask() {
15566        let a = _mm_set1_epi16(-1);
15567        let b = _mm_set1_epi16(-1);
15568        let m = _mm_cmple_epu16_mask(a, b);
15569        assert_eq!(m, 0b11111111);
15570    }
15571
15572    #[simd_test(enable = "avx512bw,avx512vl")]
15573    const fn test_mm_mask_cmple_epu16_mask() {
15574        let a = _mm_set1_epi16(-1);
15575        let b = _mm_set1_epi16(-1);
15576        let mask = 0b01010101;
15577        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
15578        assert_eq!(r, 0b01010101);
15579    }
15580
15581    #[simd_test(enable = "avx512bw")]
15582    const fn test_mm512_cmple_epu8_mask() {
15583        let a = _mm512_set1_epi8(-1);
15584        let b = _mm512_set1_epi8(-1);
15585        let m = _mm512_cmple_epu8_mask(a, b);
15586        assert_eq!(
15587            m,
15588            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15589        );
15590    }
15591
15592    #[simd_test(enable = "avx512bw")]
15593    const fn test_mm512_mask_cmple_epu8_mask() {
15594        let a = _mm512_set1_epi8(-1);
15595        let b = _mm512_set1_epi8(-1);
15596        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15597        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
15598        assert_eq!(
15599            r,
15600            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15601        );
15602    }
15603
15604    #[simd_test(enable = "avx512bw,avx512vl")]
15605    const fn test_mm256_cmple_epu8_mask() {
15606        let a = _mm256_set1_epi8(-1);
15607        let b = _mm256_set1_epi8(-1);
15608        let m = _mm256_cmple_epu8_mask(a, b);
15609        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15610    }
15611
15612    #[simd_test(enable = "avx512bw,avx512vl")]
15613    const fn test_mm256_mask_cmple_epu8_mask() {
15614        let a = _mm256_set1_epi8(-1);
15615        let b = _mm256_set1_epi8(-1);
15616        let mask = 0b01010101_01010101_01010101_01010101;
15617        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
15618        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15619    }
15620
15621    #[simd_test(enable = "avx512bw,avx512vl")]
15622    const fn test_mm_cmple_epu8_mask() {
15623        let a = _mm_set1_epi8(-1);
15624        let b = _mm_set1_epi8(-1);
15625        let m = _mm_cmple_epu8_mask(a, b);
15626        assert_eq!(m, 0b11111111_11111111);
15627    }
15628
15629    #[simd_test(enable = "avx512bw,avx512vl")]
15630    const fn test_mm_mask_cmple_epu8_mask() {
15631        let a = _mm_set1_epi8(-1);
15632        let b = _mm_set1_epi8(-1);
15633        let mask = 0b01010101_01010101;
15634        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
15635        assert_eq!(r, 0b01010101_01010101);
15636    }
15637
15638    #[simd_test(enable = "avx512bw")]
15639    const fn test_mm512_cmple_epi16_mask() {
15640        let a = _mm512_set1_epi16(-1);
15641        let b = _mm512_set1_epi16(-1);
15642        let m = _mm512_cmple_epi16_mask(a, b);
15643        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15644    }
15645
15646    #[simd_test(enable = "avx512bw")]
15647    const fn test_mm512_mask_cmple_epi16_mask() {
15648        let a = _mm512_set1_epi16(-1);
15649        let b = _mm512_set1_epi16(-1);
15650        let mask = 0b01010101_01010101_01010101_01010101;
15651        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
15652        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15653    }
15654
15655    #[simd_test(enable = "avx512bw,avx512vl")]
15656    const fn test_mm256_cmple_epi16_mask() {
15657        let a = _mm256_set1_epi16(-1);
15658        let b = _mm256_set1_epi16(-1);
15659        let m = _mm256_cmple_epi16_mask(a, b);
15660        assert_eq!(m, 0b11111111_11111111);
15661    }
15662
15663    #[simd_test(enable = "avx512bw,avx512vl")]
15664    const fn test_mm256_mask_cmple_epi16_mask() {
15665        let a = _mm256_set1_epi16(-1);
15666        let b = _mm256_set1_epi16(-1);
15667        let mask = 0b01010101_01010101;
15668        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
15669        assert_eq!(r, 0b01010101_01010101);
15670    }
15671
15672    #[simd_test(enable = "avx512bw,avx512vl")]
15673    const fn test_mm_cmple_epi16_mask() {
15674        let a = _mm_set1_epi16(-1);
15675        let b = _mm_set1_epi16(-1);
15676        let m = _mm_cmple_epi16_mask(a, b);
15677        assert_eq!(m, 0b11111111);
15678    }
15679
15680    #[simd_test(enable = "avx512bw,avx512vl")]
15681    const fn test_mm_mask_cmple_epi16_mask() {
15682        let a = _mm_set1_epi16(-1);
15683        let b = _mm_set1_epi16(-1);
15684        let mask = 0b01010101;
15685        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
15686        assert_eq!(r, 0b01010101);
15687    }
15688
15689    #[simd_test(enable = "avx512bw")]
15690    const fn test_mm512_cmple_epi8_mask() {
15691        let a = _mm512_set1_epi8(-1);
15692        let b = _mm512_set1_epi8(-1);
15693        let m = _mm512_cmple_epi8_mask(a, b);
15694        assert_eq!(
15695            m,
15696            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15697        );
15698    }
15699
15700    #[simd_test(enable = "avx512bw")]
15701    const fn test_mm512_mask_cmple_epi8_mask() {
15702        let a = _mm512_set1_epi8(-1);
15703        let b = _mm512_set1_epi8(-1);
15704        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15705        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
15706        assert_eq!(
15707            r,
15708            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15709        );
15710    }
15711
15712    #[simd_test(enable = "avx512bw,avx512vl")]
15713    const fn test_mm256_cmple_epi8_mask() {
15714        let a = _mm256_set1_epi8(-1);
15715        let b = _mm256_set1_epi8(-1);
15716        let m = _mm256_cmple_epi8_mask(a, b);
15717        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15718    }
15719
15720    #[simd_test(enable = "avx512bw,avx512vl")]
15721    const fn test_mm256_mask_cmple_epi8_mask() {
15722        let a = _mm256_set1_epi8(-1);
15723        let b = _mm256_set1_epi8(-1);
15724        let mask = 0b01010101_01010101_01010101_01010101;
15725        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
15726        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15727    }
15728
15729    #[simd_test(enable = "avx512bw,avx512vl")]
15730    const fn test_mm_cmple_epi8_mask() {
15731        let a = _mm_set1_epi8(-1);
15732        let b = _mm_set1_epi8(-1);
15733        let m = _mm_cmple_epi8_mask(a, b);
15734        assert_eq!(m, 0b11111111_11111111);
15735    }
15736
15737    #[simd_test(enable = "avx512bw,avx512vl")]
15738    const fn test_mm_mask_cmple_epi8_mask() {
15739        let a = _mm_set1_epi8(-1);
15740        let b = _mm_set1_epi8(-1);
15741        let mask = 0b01010101_01010101;
15742        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
15743        assert_eq!(r, 0b01010101_01010101);
15744    }
15745
15746    #[simd_test(enable = "avx512bw")]
15747    const fn test_mm512_cmpge_epu16_mask() {
15748        let a = _mm512_set1_epi16(1);
15749        let b = _mm512_set1_epi16(1);
15750        let m = _mm512_cmpge_epu16_mask(a, b);
15751        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15752    }
15753
15754    #[simd_test(enable = "avx512bw")]
15755    const fn test_mm512_mask_cmpge_epu16_mask() {
15756        let a = _mm512_set1_epi16(1);
15757        let b = _mm512_set1_epi16(1);
15758        let mask = 0b01010101_01010101_01010101_01010101;
15759        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
15760        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15761    }
15762
15763    #[simd_test(enable = "avx512bw,avx512vl")]
15764    const fn test_mm256_cmpge_epu16_mask() {
15765        let a = _mm256_set1_epi16(1);
15766        let b = _mm256_set1_epi16(1);
15767        let m = _mm256_cmpge_epu16_mask(a, b);
15768        assert_eq!(m, 0b11111111_11111111);
15769    }
15770
15771    #[simd_test(enable = "avx512bw,avx512vl")]
15772    const fn test_mm256_mask_cmpge_epu16_mask() {
15773        let a = _mm256_set1_epi16(1);
15774        let b = _mm256_set1_epi16(1);
15775        let mask = 0b01010101_01010101;
15776        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
15777        assert_eq!(r, 0b01010101_01010101);
15778    }
15779
15780    #[simd_test(enable = "avx512bw,avx512vl")]
15781    const fn test_mm_cmpge_epu16_mask() {
15782        let a = _mm_set1_epi16(1);
15783        let b = _mm_set1_epi16(1);
15784        let m = _mm_cmpge_epu16_mask(a, b);
15785        assert_eq!(m, 0b11111111);
15786    }
15787
15788    #[simd_test(enable = "avx512bw,avx512vl")]
15789    const fn test_mm_mask_cmpge_epu16_mask() {
15790        let a = _mm_set1_epi16(1);
15791        let b = _mm_set1_epi16(1);
15792        let mask = 0b01010101;
15793        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
15794        assert_eq!(r, 0b01010101);
15795    }
15796
15797    #[simd_test(enable = "avx512bw")]
15798    const fn test_mm512_cmpge_epu8_mask() {
15799        let a = _mm512_set1_epi8(1);
15800        let b = _mm512_set1_epi8(1);
15801        let m = _mm512_cmpge_epu8_mask(a, b);
15802        assert_eq!(
15803            m,
15804            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15805        );
15806    }
15807
15808    #[simd_test(enable = "avx512bw")]
15809    const fn test_mm512_mask_cmpge_epu8_mask() {
15810        let a = _mm512_set1_epi8(1);
15811        let b = _mm512_set1_epi8(1);
15812        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15813        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
15814        assert_eq!(
15815            r,
15816            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15817        );
15818    }
15819
15820    #[simd_test(enable = "avx512bw,avx512vl")]
15821    const fn test_mm256_cmpge_epu8_mask() {
15822        let a = _mm256_set1_epi8(1);
15823        let b = _mm256_set1_epi8(1);
15824        let m = _mm256_cmpge_epu8_mask(a, b);
15825        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15826    }
15827
15828    #[simd_test(enable = "avx512bw,avx512vl")]
15829    const fn test_mm256_mask_cmpge_epu8_mask() {
15830        let a = _mm256_set1_epi8(1);
15831        let b = _mm256_set1_epi8(1);
15832        let mask = 0b01010101_01010101_01010101_01010101;
15833        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
15834        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15835    }
15836
15837    #[simd_test(enable = "avx512bw,avx512vl")]
15838    const fn test_mm_cmpge_epu8_mask() {
15839        let a = _mm_set1_epi8(1);
15840        let b = _mm_set1_epi8(1);
15841        let m = _mm_cmpge_epu8_mask(a, b);
15842        assert_eq!(m, 0b11111111_11111111);
15843    }
15844
15845    #[simd_test(enable = "avx512bw,avx512vl")]
15846    const fn test_mm_mask_cmpge_epu8_mask() {
15847        let a = _mm_set1_epi8(1);
15848        let b = _mm_set1_epi8(1);
15849        let mask = 0b01010101_01010101;
15850        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
15851        assert_eq!(r, 0b01010101_01010101);
15852    }
15853
15854    #[simd_test(enable = "avx512bw")]
15855    const fn test_mm512_cmpge_epi16_mask() {
15856        let a = _mm512_set1_epi16(-1);
15857        let b = _mm512_set1_epi16(-1);
15858        let m = _mm512_cmpge_epi16_mask(a, b);
15859        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15860    }
15861
15862    #[simd_test(enable = "avx512bw")]
15863    const fn test_mm512_mask_cmpge_epi16_mask() {
15864        let a = _mm512_set1_epi16(-1);
15865        let b = _mm512_set1_epi16(-1);
15866        let mask = 0b01010101_01010101_01010101_01010101;
15867        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
15868        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15869    }
15870
15871    #[simd_test(enable = "avx512bw,avx512vl")]
15872    const fn test_mm256_cmpge_epi16_mask() {
15873        let a = _mm256_set1_epi16(-1);
15874        let b = _mm256_set1_epi16(-1);
15875        let m = _mm256_cmpge_epi16_mask(a, b);
15876        assert_eq!(m, 0b11111111_11111111);
15877    }
15878
15879    #[simd_test(enable = "avx512bw,avx512vl")]
15880    const fn test_mm256_mask_cmpge_epi16_mask() {
15881        let a = _mm256_set1_epi16(-1);
15882        let b = _mm256_set1_epi16(-1);
15883        let mask = 0b01010101_01010101;
15884        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15885        assert_eq!(r, 0b01010101_01010101);
15886    }
15887
15888    #[simd_test(enable = "avx512bw,avx512vl")]
15889    const fn test_mm_cmpge_epi16_mask() {
15890        let a = _mm_set1_epi16(-1);
15891        let b = _mm_set1_epi16(-1);
15892        let m = _mm_cmpge_epi16_mask(a, b);
15893        assert_eq!(m, 0b11111111);
15894    }
15895
15896    #[simd_test(enable = "avx512bw,avx512vl")]
15897    const fn test_mm_mask_cmpge_epi16_mask() {
15898        let a = _mm_set1_epi16(-1);
15899        let b = _mm_set1_epi16(-1);
15900        let mask = 0b01010101;
15901        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15902        assert_eq!(r, 0b01010101);
15903    }
15904
15905    #[simd_test(enable = "avx512bw")]
15906    const fn test_mm512_cmpge_epi8_mask() {
15907        let a = _mm512_set1_epi8(-1);
15908        let b = _mm512_set1_epi8(-1);
15909        let m = _mm512_cmpge_epi8_mask(a, b);
15910        assert_eq!(
15911            m,
15912            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15913        );
15914    }
15915
15916    #[simd_test(enable = "avx512bw")]
15917    const fn test_mm512_mask_cmpge_epi8_mask() {
15918        let a = _mm512_set1_epi8(-1);
15919        let b = _mm512_set1_epi8(-1);
15920        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15921        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15922        assert_eq!(
15923            r,
15924            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15925        );
15926    }
15927
15928    #[simd_test(enable = "avx512bw,avx512vl")]
15929    const fn test_mm256_cmpge_epi8_mask() {
15930        let a = _mm256_set1_epi8(-1);
15931        let b = _mm256_set1_epi8(-1);
15932        let m = _mm256_cmpge_epi8_mask(a, b);
15933        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15934    }
15935
15936    #[simd_test(enable = "avx512bw,avx512vl")]
15937    const fn test_mm256_mask_cmpge_epi8_mask() {
15938        let a = _mm256_set1_epi8(-1);
15939        let b = _mm256_set1_epi8(-1);
15940        let mask = 0b01010101_01010101_01010101_01010101;
15941        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15942        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15943    }
15944
15945    #[simd_test(enable = "avx512bw,avx512vl")]
15946    const fn test_mm_cmpge_epi8_mask() {
15947        let a = _mm_set1_epi8(-1);
15948        let b = _mm_set1_epi8(-1);
15949        let m = _mm_cmpge_epi8_mask(a, b);
15950        assert_eq!(m, 0b11111111_11111111);
15951    }
15952
15953    #[simd_test(enable = "avx512bw,avx512vl")]
15954    const fn test_mm_mask_cmpge_epi8_mask() {
15955        let a = _mm_set1_epi8(-1);
15956        let b = _mm_set1_epi8(-1);
15957        let mask = 0b01010101_01010101;
15958        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15959        assert_eq!(r, 0b01010101_01010101);
15960    }
15961
15962    #[simd_test(enable = "avx512bw")]
15963    const fn test_mm512_cmpeq_epu16_mask() {
15964        let a = _mm512_set1_epi16(1);
15965        let b = _mm512_set1_epi16(1);
15966        let m = _mm512_cmpeq_epu16_mask(a, b);
15967        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15968    }
15969
15970    #[simd_test(enable = "avx512bw")]
15971    const fn test_mm512_mask_cmpeq_epu16_mask() {
15972        let a = _mm512_set1_epi16(1);
15973        let b = _mm512_set1_epi16(1);
15974        let mask = 0b01010101_01010101_01010101_01010101;
15975        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15976        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15977    }
15978
15979    #[simd_test(enable = "avx512bw,avx512vl")]
15980    const fn test_mm256_cmpeq_epu16_mask() {
15981        let a = _mm256_set1_epi16(1);
15982        let b = _mm256_set1_epi16(1);
15983        let m = _mm256_cmpeq_epu16_mask(a, b);
15984        assert_eq!(m, 0b11111111_11111111);
15985    }
15986
15987    #[simd_test(enable = "avx512bw,avx512vl")]
15988    const fn test_mm256_mask_cmpeq_epu16_mask() {
15989        let a = _mm256_set1_epi16(1);
15990        let b = _mm256_set1_epi16(1);
15991        let mask = 0b01010101_01010101;
15992        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15993        assert_eq!(r, 0b01010101_01010101);
15994    }
15995
15996    #[simd_test(enable = "avx512bw,avx512vl")]
15997    const fn test_mm_cmpeq_epu16_mask() {
15998        let a = _mm_set1_epi16(1);
15999        let b = _mm_set1_epi16(1);
16000        let m = _mm_cmpeq_epu16_mask(a, b);
16001        assert_eq!(m, 0b11111111);
16002    }
16003
16004    #[simd_test(enable = "avx512bw,avx512vl")]
16005    const fn test_mm_mask_cmpeq_epu16_mask() {
16006        let a = _mm_set1_epi16(1);
16007        let b = _mm_set1_epi16(1);
16008        let mask = 0b01010101;
16009        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
16010        assert_eq!(r, 0b01010101);
16011    }
16012
16013    #[simd_test(enable = "avx512bw")]
16014    const fn test_mm512_cmpeq_epu8_mask() {
16015        let a = _mm512_set1_epi8(1);
16016        let b = _mm512_set1_epi8(1);
16017        let m = _mm512_cmpeq_epu8_mask(a, b);
16018        assert_eq!(
16019            m,
16020            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16021        );
16022    }
16023
16024    #[simd_test(enable = "avx512bw")]
16025    const fn test_mm512_mask_cmpeq_epu8_mask() {
16026        let a = _mm512_set1_epi8(1);
16027        let b = _mm512_set1_epi8(1);
16028        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16029        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
16030        assert_eq!(
16031            r,
16032            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16033        );
16034    }
16035
16036    #[simd_test(enable = "avx512bw,avx512vl")]
16037    const fn test_mm256_cmpeq_epu8_mask() {
16038        let a = _mm256_set1_epi8(1);
16039        let b = _mm256_set1_epi8(1);
16040        let m = _mm256_cmpeq_epu8_mask(a, b);
16041        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16042    }
16043
16044    #[simd_test(enable = "avx512bw,avx512vl")]
16045    const fn test_mm256_mask_cmpeq_epu8_mask() {
16046        let a = _mm256_set1_epi8(1);
16047        let b = _mm256_set1_epi8(1);
16048        let mask = 0b01010101_01010101_01010101_01010101;
16049        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
16050        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16051    }
16052
16053    #[simd_test(enable = "avx512bw,avx512vl")]
16054    const fn test_mm_cmpeq_epu8_mask() {
16055        let a = _mm_set1_epi8(1);
16056        let b = _mm_set1_epi8(1);
16057        let m = _mm_cmpeq_epu8_mask(a, b);
16058        assert_eq!(m, 0b11111111_11111111);
16059    }
16060
16061    #[simd_test(enable = "avx512bw,avx512vl")]
16062    const fn test_mm_mask_cmpeq_epu8_mask() {
16063        let a = _mm_set1_epi8(1);
16064        let b = _mm_set1_epi8(1);
16065        let mask = 0b01010101_01010101;
16066        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
16067        assert_eq!(r, 0b01010101_01010101);
16068    }
16069
16070    #[simd_test(enable = "avx512bw")]
16071    const fn test_mm512_cmpeq_epi16_mask() {
16072        let a = _mm512_set1_epi16(-1);
16073        let b = _mm512_set1_epi16(-1);
16074        let m = _mm512_cmpeq_epi16_mask(a, b);
16075        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16076    }
16077
16078    #[simd_test(enable = "avx512bw")]
16079    const fn test_mm512_mask_cmpeq_epi16_mask() {
16080        let a = _mm512_set1_epi16(-1);
16081        let b = _mm512_set1_epi16(-1);
16082        let mask = 0b01010101_01010101_01010101_01010101;
16083        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
16084        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16085    }
16086
16087    #[simd_test(enable = "avx512bw,avx512vl")]
16088    const fn test_mm256_cmpeq_epi16_mask() {
16089        let a = _mm256_set1_epi16(-1);
16090        let b = _mm256_set1_epi16(-1);
16091        let m = _mm256_cmpeq_epi16_mask(a, b);
16092        assert_eq!(m, 0b11111111_11111111);
16093    }
16094
16095    #[simd_test(enable = "avx512bw,avx512vl")]
16096    const fn test_mm256_mask_cmpeq_epi16_mask() {
16097        let a = _mm256_set1_epi16(-1);
16098        let b = _mm256_set1_epi16(-1);
16099        let mask = 0b01010101_01010101;
16100        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
16101        assert_eq!(r, 0b01010101_01010101);
16102    }
16103
16104    #[simd_test(enable = "avx512bw,avx512vl")]
16105    const fn test_mm_cmpeq_epi16_mask() {
16106        let a = _mm_set1_epi16(-1);
16107        let b = _mm_set1_epi16(-1);
16108        let m = _mm_cmpeq_epi16_mask(a, b);
16109        assert_eq!(m, 0b11111111);
16110    }
16111
16112    #[simd_test(enable = "avx512bw,avx512vl")]
16113    const fn test_mm_mask_cmpeq_epi16_mask() {
16114        let a = _mm_set1_epi16(-1);
16115        let b = _mm_set1_epi16(-1);
16116        let mask = 0b01010101;
16117        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
16118        assert_eq!(r, 0b01010101);
16119    }
16120
16121    #[simd_test(enable = "avx512bw")]
16122    const fn test_mm512_cmpeq_epi8_mask() {
16123        let a = _mm512_set1_epi8(-1);
16124        let b = _mm512_set1_epi8(-1);
16125        let m = _mm512_cmpeq_epi8_mask(a, b);
16126        assert_eq!(
16127            m,
16128            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16129        );
16130    }
16131
16132    #[simd_test(enable = "avx512bw")]
16133    const fn test_mm512_mask_cmpeq_epi8_mask() {
16134        let a = _mm512_set1_epi8(-1);
16135        let b = _mm512_set1_epi8(-1);
16136        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16137        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
16138        assert_eq!(
16139            r,
16140            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16141        );
16142    }
16143
16144    #[simd_test(enable = "avx512bw,avx512vl")]
16145    const fn test_mm256_cmpeq_epi8_mask() {
16146        let a = _mm256_set1_epi8(-1);
16147        let b = _mm256_set1_epi8(-1);
16148        let m = _mm256_cmpeq_epi8_mask(a, b);
16149        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16150    }
16151
16152    #[simd_test(enable = "avx512bw,avx512vl")]
16153    const fn test_mm256_mask_cmpeq_epi8_mask() {
16154        let a = _mm256_set1_epi8(-1);
16155        let b = _mm256_set1_epi8(-1);
16156        let mask = 0b01010101_01010101_01010101_01010101;
16157        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
16158        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16159    }
16160
16161    #[simd_test(enable = "avx512bw,avx512vl")]
16162    const fn test_mm_cmpeq_epi8_mask() {
16163        let a = _mm_set1_epi8(-1);
16164        let b = _mm_set1_epi8(-1);
16165        let m = _mm_cmpeq_epi8_mask(a, b);
16166        assert_eq!(m, 0b11111111_11111111);
16167    }
16168
16169    #[simd_test(enable = "avx512bw,avx512vl")]
16170    const fn test_mm_mask_cmpeq_epi8_mask() {
16171        let a = _mm_set1_epi8(-1);
16172        let b = _mm_set1_epi8(-1);
16173        let mask = 0b01010101_01010101;
16174        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
16175        assert_eq!(r, 0b01010101_01010101);
16176    }
16177
16178    #[simd_test(enable = "avx512bw")]
16179    const fn test_mm512_cmpneq_epu16_mask() {
16180        let a = _mm512_set1_epi16(2);
16181        let b = _mm512_set1_epi16(1);
16182        let m = _mm512_cmpneq_epu16_mask(a, b);
16183        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16184    }
16185
16186    #[simd_test(enable = "avx512bw")]
16187    const fn test_mm512_mask_cmpneq_epu16_mask() {
16188        let a = _mm512_set1_epi16(2);
16189        let b = _mm512_set1_epi16(1);
16190        let mask = 0b01010101_01010101_01010101_01010101;
16191        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
16192        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16193    }
16194
16195    #[simd_test(enable = "avx512bw,avx512vl")]
16196    const fn test_mm256_cmpneq_epu16_mask() {
16197        let a = _mm256_set1_epi16(2);
16198        let b = _mm256_set1_epi16(1);
16199        let m = _mm256_cmpneq_epu16_mask(a, b);
16200        assert_eq!(m, 0b11111111_11111111);
16201    }
16202
16203    #[simd_test(enable = "avx512bw,avx512vl")]
16204    const fn test_mm256_mask_cmpneq_epu16_mask() {
16205        let a = _mm256_set1_epi16(2);
16206        let b = _mm256_set1_epi16(1);
16207        let mask = 0b01010101_01010101;
16208        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
16209        assert_eq!(r, 0b01010101_01010101);
16210    }
16211
16212    #[simd_test(enable = "avx512bw,avx512vl")]
16213    const fn test_mm_cmpneq_epu16_mask() {
16214        let a = _mm_set1_epi16(2);
16215        let b = _mm_set1_epi16(1);
16216        let m = _mm_cmpneq_epu16_mask(a, b);
16217        assert_eq!(m, 0b11111111);
16218    }
16219
16220    #[simd_test(enable = "avx512bw,avx512vl")]
16221    const fn test_mm_mask_cmpneq_epu16_mask() {
16222        let a = _mm_set1_epi16(2);
16223        let b = _mm_set1_epi16(1);
16224        let mask = 0b01010101;
16225        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
16226        assert_eq!(r, 0b01010101);
16227    }
16228
16229    #[simd_test(enable = "avx512bw")]
16230    const fn test_mm512_cmpneq_epu8_mask() {
16231        let a = _mm512_set1_epi8(2);
16232        let b = _mm512_set1_epi8(1);
16233        let m = _mm512_cmpneq_epu8_mask(a, b);
16234        assert_eq!(
16235            m,
16236            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16237        );
16238    }
16239
16240    #[simd_test(enable = "avx512bw")]
16241    const fn test_mm512_mask_cmpneq_epu8_mask() {
16242        let a = _mm512_set1_epi8(2);
16243        let b = _mm512_set1_epi8(1);
16244        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16245        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
16246        assert_eq!(
16247            r,
16248            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16249        );
16250    }
16251
16252    #[simd_test(enable = "avx512bw,avx512vl")]
16253    const fn test_mm256_cmpneq_epu8_mask() {
16254        let a = _mm256_set1_epi8(2);
16255        let b = _mm256_set1_epi8(1);
16256        let m = _mm256_cmpneq_epu8_mask(a, b);
16257        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16258    }
16259
16260    #[simd_test(enable = "avx512bw,avx512vl")]
16261    const fn test_mm256_mask_cmpneq_epu8_mask() {
16262        let a = _mm256_set1_epi8(2);
16263        let b = _mm256_set1_epi8(1);
16264        let mask = 0b01010101_01010101_01010101_01010101;
16265        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
16266        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16267    }
16268
16269    #[simd_test(enable = "avx512bw,avx512vl")]
16270    const fn test_mm_cmpneq_epu8_mask() {
16271        let a = _mm_set1_epi8(2);
16272        let b = _mm_set1_epi8(1);
16273        let m = _mm_cmpneq_epu8_mask(a, b);
16274        assert_eq!(m, 0b11111111_11111111);
16275    }
16276
16277    #[simd_test(enable = "avx512bw,avx512vl")]
16278    const fn test_mm_mask_cmpneq_epu8_mask() {
16279        let a = _mm_set1_epi8(2);
16280        let b = _mm_set1_epi8(1);
16281        let mask = 0b01010101_01010101;
16282        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
16283        assert_eq!(r, 0b01010101_01010101);
16284    }
16285
16286    #[simd_test(enable = "avx512bw")]
16287    const fn test_mm512_cmpneq_epi16_mask() {
16288        let a = _mm512_set1_epi16(1);
16289        let b = _mm512_set1_epi16(-1);
16290        let m = _mm512_cmpneq_epi16_mask(a, b);
16291        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16292    }
16293
16294    #[simd_test(enable = "avx512bw")]
16295    const fn test_mm512_mask_cmpneq_epi16_mask() {
16296        let a = _mm512_set1_epi16(1);
16297        let b = _mm512_set1_epi16(-1);
16298        let mask = 0b01010101_01010101_01010101_01010101;
16299        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
16300        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16301    }
16302
16303    #[simd_test(enable = "avx512bw,avx512vl")]
16304    const fn test_mm256_cmpneq_epi16_mask() {
16305        let a = _mm256_set1_epi16(1);
16306        let b = _mm256_set1_epi16(-1);
16307        let m = _mm256_cmpneq_epi16_mask(a, b);
16308        assert_eq!(m, 0b11111111_11111111);
16309    }
16310
16311    #[simd_test(enable = "avx512bw,avx512vl")]
16312    const fn test_mm256_mask_cmpneq_epi16_mask() {
16313        let a = _mm256_set1_epi16(1);
16314        let b = _mm256_set1_epi16(-1);
16315        let mask = 0b01010101_01010101;
16316        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
16317        assert_eq!(r, 0b01010101_01010101);
16318    }
16319
16320    #[simd_test(enable = "avx512bw,avx512vl")]
16321    const fn test_mm_cmpneq_epi16_mask() {
16322        let a = _mm_set1_epi16(1);
16323        let b = _mm_set1_epi16(-1);
16324        let m = _mm_cmpneq_epi16_mask(a, b);
16325        assert_eq!(m, 0b11111111);
16326    }
16327
16328    #[simd_test(enable = "avx512bw,avx512vl")]
16329    const fn test_mm_mask_cmpneq_epi16_mask() {
16330        let a = _mm_set1_epi16(1);
16331        let b = _mm_set1_epi16(-1);
16332        let mask = 0b01010101;
16333        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
16334        assert_eq!(r, 0b01010101);
16335    }
16336
16337    #[simd_test(enable = "avx512bw")]
16338    const fn test_mm512_cmpneq_epi8_mask() {
16339        let a = _mm512_set1_epi8(1);
16340        let b = _mm512_set1_epi8(-1);
16341        let m = _mm512_cmpneq_epi8_mask(a, b);
16342        assert_eq!(
16343            m,
16344            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16345        );
16346    }
16347
16348    #[simd_test(enable = "avx512bw")]
16349    const fn test_mm512_mask_cmpneq_epi8_mask() {
16350        let a = _mm512_set1_epi8(1);
16351        let b = _mm512_set1_epi8(-1);
16352        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16353        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
16354        assert_eq!(
16355            r,
16356            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16357        );
16358    }
16359
16360    #[simd_test(enable = "avx512bw,avx512vl")]
16361    const fn test_mm256_cmpneq_epi8_mask() {
16362        let a = _mm256_set1_epi8(1);
16363        let b = _mm256_set1_epi8(-1);
16364        let m = _mm256_cmpneq_epi8_mask(a, b);
16365        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16366    }
16367
16368    #[simd_test(enable = "avx512bw,avx512vl")]
16369    const fn test_mm256_mask_cmpneq_epi8_mask() {
16370        let a = _mm256_set1_epi8(1);
16371        let b = _mm256_set1_epi8(-1);
16372        let mask = 0b01010101_01010101_01010101_01010101;
16373        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
16374        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16375    }
16376
16377    #[simd_test(enable = "avx512bw,avx512vl")]
16378    const fn test_mm_cmpneq_epi8_mask() {
16379        let a = _mm_set1_epi8(1);
16380        let b = _mm_set1_epi8(-1);
16381        let m = _mm_cmpneq_epi8_mask(a, b);
16382        assert_eq!(m, 0b11111111_11111111);
16383    }
16384
16385    #[simd_test(enable = "avx512bw,avx512vl")]
16386    const fn test_mm_mask_cmpneq_epi8_mask() {
16387        let a = _mm_set1_epi8(1);
16388        let b = _mm_set1_epi8(-1);
16389        let mask = 0b01010101_01010101;
16390        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
16391        assert_eq!(r, 0b01010101_01010101);
16392    }
16393
16394    #[simd_test(enable = "avx512bw")]
16395    const fn test_mm512_cmp_epu16_mask() {
16396        let a = _mm512_set1_epi16(0);
16397        let b = _mm512_set1_epi16(1);
16398        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16399        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16400    }
16401
16402    #[simd_test(enable = "avx512bw")]
16403    const fn test_mm512_mask_cmp_epu16_mask() {
16404        let a = _mm512_set1_epi16(0);
16405        let b = _mm512_set1_epi16(1);
16406        let mask = 0b01010101_01010101_01010101_01010101;
16407        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16408        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16409    }
16410
16411    #[simd_test(enable = "avx512bw,avx512vl")]
16412    const fn test_mm256_cmp_epu16_mask() {
16413        let a = _mm256_set1_epi16(0);
16414        let b = _mm256_set1_epi16(1);
16415        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16416        assert_eq!(m, 0b11111111_11111111);
16417    }
16418
16419    #[simd_test(enable = "avx512bw,avx512vl")]
16420    const fn test_mm256_mask_cmp_epu16_mask() {
16421        let a = _mm256_set1_epi16(0);
16422        let b = _mm256_set1_epi16(1);
16423        let mask = 0b01010101_01010101;
16424        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16425        assert_eq!(r, 0b01010101_01010101);
16426    }
16427
16428    #[simd_test(enable = "avx512bw,avx512vl")]
16429    const fn test_mm_cmp_epu16_mask() {
16430        let a = _mm_set1_epi16(0);
16431        let b = _mm_set1_epi16(1);
16432        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16433        assert_eq!(m, 0b11111111);
16434    }
16435
16436    #[simd_test(enable = "avx512bw,avx512vl")]
16437    const fn test_mm_mask_cmp_epu16_mask() {
16438        let a = _mm_set1_epi16(0);
16439        let b = _mm_set1_epi16(1);
16440        let mask = 0b01010101;
16441        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16442        assert_eq!(r, 0b01010101);
16443    }
16444
16445    #[simd_test(enable = "avx512bw")]
16446    const fn test_mm512_cmp_epu8_mask() {
16447        let a = _mm512_set1_epi8(0);
16448        let b = _mm512_set1_epi8(1);
16449        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16450        assert_eq!(
16451            m,
16452            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16453        );
16454    }
16455
16456    #[simd_test(enable = "avx512bw")]
16457    const fn test_mm512_mask_cmp_epu8_mask() {
16458        let a = _mm512_set1_epi8(0);
16459        let b = _mm512_set1_epi8(1);
16460        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16461        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16462        assert_eq!(
16463            r,
16464            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16465        );
16466    }
16467
16468    #[simd_test(enable = "avx512bw,avx512vl")]
16469    const fn test_mm256_cmp_epu8_mask() {
16470        let a = _mm256_set1_epi8(0);
16471        let b = _mm256_set1_epi8(1);
16472        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16473        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16474    }
16475
16476    #[simd_test(enable = "avx512bw,avx512vl")]
16477    const fn test_mm256_mask_cmp_epu8_mask() {
16478        let a = _mm256_set1_epi8(0);
16479        let b = _mm256_set1_epi8(1);
16480        let mask = 0b01010101_01010101_01010101_01010101;
16481        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16482        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16483    }
16484
16485    #[simd_test(enable = "avx512bw,avx512vl")]
16486    const fn test_mm_cmp_epu8_mask() {
16487        let a = _mm_set1_epi8(0);
16488        let b = _mm_set1_epi8(1);
16489        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16490        assert_eq!(m, 0b11111111_11111111);
16491    }
16492
16493    #[simd_test(enable = "avx512bw,avx512vl")]
16494    const fn test_mm_mask_cmp_epu8_mask() {
16495        let a = _mm_set1_epi8(0);
16496        let b = _mm_set1_epi8(1);
16497        let mask = 0b01010101_01010101;
16498        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16499        assert_eq!(r, 0b01010101_01010101);
16500    }
16501
16502    #[simd_test(enable = "avx512bw")]
16503    const fn test_mm512_cmp_epi16_mask() {
16504        let a = _mm512_set1_epi16(0);
16505        let b = _mm512_set1_epi16(1);
16506        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16507        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16508    }
16509
16510    #[simd_test(enable = "avx512bw")]
16511    const fn test_mm512_mask_cmp_epi16_mask() {
16512        let a = _mm512_set1_epi16(0);
16513        let b = _mm512_set1_epi16(1);
16514        let mask = 0b01010101_01010101_01010101_01010101;
16515        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16516        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16517    }
16518
16519    #[simd_test(enable = "avx512bw,avx512vl")]
16520    const fn test_mm256_cmp_epi16_mask() {
16521        let a = _mm256_set1_epi16(0);
16522        let b = _mm256_set1_epi16(1);
16523        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16524        assert_eq!(m, 0b11111111_11111111);
16525    }
16526
16527    #[simd_test(enable = "avx512bw,avx512vl")]
16528    const fn test_mm256_mask_cmp_epi16_mask() {
16529        let a = _mm256_set1_epi16(0);
16530        let b = _mm256_set1_epi16(1);
16531        let mask = 0b01010101_01010101;
16532        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16533        assert_eq!(r, 0b01010101_01010101);
16534    }
16535
16536    #[simd_test(enable = "avx512bw,avx512vl")]
16537    const fn test_mm_cmp_epi16_mask() {
16538        let a = _mm_set1_epi16(0);
16539        let b = _mm_set1_epi16(1);
16540        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16541        assert_eq!(m, 0b11111111);
16542    }
16543
16544    #[simd_test(enable = "avx512bw,avx512vl")]
16545    const fn test_mm_mask_cmp_epi16_mask() {
16546        let a = _mm_set1_epi16(0);
16547        let b = _mm_set1_epi16(1);
16548        let mask = 0b01010101;
16549        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16550        assert_eq!(r, 0b01010101);
16551    }
16552
16553    #[simd_test(enable = "avx512bw")]
16554    const fn test_mm512_cmp_epi8_mask() {
16555        let a = _mm512_set1_epi8(0);
16556        let b = _mm512_set1_epi8(1);
16557        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16558        assert_eq!(
16559            m,
16560            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16561        );
16562    }
16563
16564    #[simd_test(enable = "avx512bw")]
16565    const fn test_mm512_mask_cmp_epi8_mask() {
16566        let a = _mm512_set1_epi8(0);
16567        let b = _mm512_set1_epi8(1);
16568        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16569        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16570        assert_eq!(
16571            r,
16572            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16573        );
16574    }
16575
16576    #[simd_test(enable = "avx512bw,avx512vl")]
16577    const fn test_mm256_cmp_epi8_mask() {
16578        let a = _mm256_set1_epi8(0);
16579        let b = _mm256_set1_epi8(1);
16580        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16581        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16582    }
16583
16584    #[simd_test(enable = "avx512bw,avx512vl")]
16585    const fn test_mm256_mask_cmp_epi8_mask() {
16586        let a = _mm256_set1_epi8(0);
16587        let b = _mm256_set1_epi8(1);
16588        let mask = 0b01010101_01010101_01010101_01010101;
16589        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16590        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16591    }
16592
16593    #[simd_test(enable = "avx512bw,avx512vl")]
16594    const fn test_mm_cmp_epi8_mask() {
16595        let a = _mm_set1_epi8(0);
16596        let b = _mm_set1_epi8(1);
16597        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16598        assert_eq!(m, 0b11111111_11111111);
16599    }
16600
16601    #[simd_test(enable = "avx512bw,avx512vl")]
16602    const fn test_mm_mask_cmp_epi8_mask() {
16603        let a = _mm_set1_epi8(0);
16604        let b = _mm_set1_epi8(1);
16605        let mask = 0b01010101_01010101;
16606        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16607        assert_eq!(r, 0b01010101_01010101);
16608    }
16609
16610    #[simd_test(enable = "avx512bw,avx512vl")]
16611    const fn test_mm256_reduce_add_epi16() {
16612        let a = _mm256_set1_epi16(1);
16613        let e = _mm256_reduce_add_epi16(a);
16614        assert_eq!(16, e);
16615    }
16616
16617    #[simd_test(enable = "avx512bw,avx512vl")]
16618    const fn test_mm256_mask_reduce_add_epi16() {
16619        let a = _mm256_set1_epi16(1);
16620        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
16621        assert_eq!(8, e);
16622    }
16623
16624    #[simd_test(enable = "avx512bw,avx512vl")]
16625    const fn test_mm_reduce_add_epi16() {
16626        let a = _mm_set1_epi16(1);
16627        let e = _mm_reduce_add_epi16(a);
16628        assert_eq!(8, e);
16629    }
16630
16631    #[simd_test(enable = "avx512bw,avx512vl")]
16632    const fn test_mm_mask_reduce_add_epi16() {
16633        let a = _mm_set1_epi16(1);
16634        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
16635        assert_eq!(4, e);
16636    }
16637
16638    #[simd_test(enable = "avx512bw,avx512vl")]
16639    const fn test_mm256_reduce_add_epi8() {
16640        let a = _mm256_set1_epi8(1);
16641        let e = _mm256_reduce_add_epi8(a);
16642        assert_eq!(32, e);
16643    }
16644
16645    #[simd_test(enable = "avx512bw,avx512vl")]
16646    const fn test_mm256_mask_reduce_add_epi8() {
16647        let a = _mm256_set1_epi8(1);
16648        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
16649        assert_eq!(16, e);
16650    }
16651
16652    #[simd_test(enable = "avx512bw,avx512vl")]
16653    const fn test_mm_reduce_add_epi8() {
16654        let a = _mm_set1_epi8(1);
16655        let e = _mm_reduce_add_epi8(a);
16656        assert_eq!(16, e);
16657    }
16658
16659    #[simd_test(enable = "avx512bw,avx512vl")]
16660    const fn test_mm_mask_reduce_add_epi8() {
16661        let a = _mm_set1_epi8(1);
16662        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
16663        assert_eq!(8, e);
16664    }
16665
16666    #[simd_test(enable = "avx512bw,avx512vl")]
16667    const fn test_mm256_reduce_and_epi16() {
16668        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16669        let e = _mm256_reduce_and_epi16(a);
16670        assert_eq!(0, e);
16671    }
16672
16673    #[simd_test(enable = "avx512bw,avx512vl")]
16674    const fn test_mm256_mask_reduce_and_epi16() {
16675        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16676        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
16677        assert_eq!(1, e);
16678    }
16679
16680    #[simd_test(enable = "avx512bw,avx512vl")]
16681    const fn test_mm_reduce_and_epi16() {
16682        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16683        let e = _mm_reduce_and_epi16(a);
16684        assert_eq!(0, e);
16685    }
16686
16687    #[simd_test(enable = "avx512bw,avx512vl")]
16688    const fn test_mm_mask_reduce_and_epi16() {
16689        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16690        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
16691        assert_eq!(1, e);
16692    }
16693
16694    #[simd_test(enable = "avx512bw,avx512vl")]
16695    const fn test_mm256_reduce_and_epi8() {
16696        let a = _mm256_set_epi8(
16697            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16698            2, 2, 2,
16699        );
16700        let e = _mm256_reduce_and_epi8(a);
16701        assert_eq!(0, e);
16702    }
16703
16704    #[simd_test(enable = "avx512bw,avx512vl")]
16705    const fn test_mm256_mask_reduce_and_epi8() {
16706        let a = _mm256_set_epi8(
16707            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16708            2, 2, 2,
16709        );
16710        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
16711        assert_eq!(1, e);
16712    }
16713
16714    #[simd_test(enable = "avx512bw,avx512vl")]
16715    const fn test_mm_reduce_and_epi8() {
16716        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16717        let e = _mm_reduce_and_epi8(a);
16718        assert_eq!(0, e);
16719    }
16720
16721    #[simd_test(enable = "avx512bw,avx512vl")]
16722    const fn test_mm_mask_reduce_and_epi8() {
16723        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16724        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
16725        assert_eq!(1, e);
16726    }
16727
16728    #[simd_test(enable = "avx512bw,avx512vl")]
16729    const fn test_mm256_reduce_mul_epi16() {
16730        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16731        let e = _mm256_reduce_mul_epi16(a);
16732        assert_eq!(256, e);
16733    }
16734
16735    #[simd_test(enable = "avx512bw,avx512vl")]
16736    const fn test_mm256_mask_reduce_mul_epi16() {
16737        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16738        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
16739        assert_eq!(1, e);
16740    }
16741
16742    #[simd_test(enable = "avx512bw,avx512vl")]
16743    const fn test_mm_reduce_mul_epi16() {
16744        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
16745        let e = _mm_reduce_mul_epi16(a);
16746        assert_eq!(16, e);
16747    }
16748
16749    #[simd_test(enable = "avx512bw,avx512vl")]
16750    const fn test_mm_mask_reduce_mul_epi16() {
16751        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16752        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
16753        assert_eq!(1, e);
16754    }
16755
16756    #[simd_test(enable = "avx512bw,avx512vl")]
16757    const fn test_mm256_reduce_mul_epi8() {
16758        let a = _mm256_set_epi8(
16759            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16760            2, 2, 2,
16761        );
16762        let e = _mm256_reduce_mul_epi8(a);
16763        assert_eq!(64, e);
16764    }
16765
16766    #[simd_test(enable = "avx512bw,avx512vl")]
16767    const fn test_mm256_mask_reduce_mul_epi8() {
16768        let a = _mm256_set_epi8(
16769            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16770            2, 2, 2,
16771        );
16772        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
16773        assert_eq!(1, e);
16774    }
16775
16776    #[simd_test(enable = "avx512bw,avx512vl")]
16777    const fn test_mm_reduce_mul_epi8() {
16778        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16779        let e = _mm_reduce_mul_epi8(a);
16780        assert_eq!(8, e);
16781    }
16782
16783    #[simd_test(enable = "avx512bw,avx512vl")]
16784    const fn test_mm_mask_reduce_mul_epi8() {
16785        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16786        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
16787        assert_eq!(1, e);
16788    }
16789
16790    #[simd_test(enable = "avx512bw,avx512vl")]
16791    const fn test_mm256_reduce_max_epi16() {
16792        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16793        let e: i16 = _mm256_reduce_max_epi16(a);
16794        assert_eq!(15, e);
16795    }
16796
16797    #[simd_test(enable = "avx512bw,avx512vl")]
16798    const fn test_mm256_mask_reduce_max_epi16() {
16799        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16800        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
16801        assert_eq!(7, e);
16802    }
16803
16804    #[simd_test(enable = "avx512bw,avx512vl")]
16805    const fn test_mm_reduce_max_epi16() {
16806        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16807        let e: i16 = _mm_reduce_max_epi16(a);
16808        assert_eq!(7, e);
16809    }
16810
16811    #[simd_test(enable = "avx512bw,avx512vl")]
16812    const fn test_mm_mask_reduce_max_epi16() {
16813        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16814        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
16815        assert_eq!(3, e);
16816    }
16817
16818    #[simd_test(enable = "avx512bw,avx512vl")]
16819    const fn test_mm256_reduce_max_epi8() {
16820        let a = _mm256_set_epi8(
16821            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16822            24, 25, 26, 27, 28, 29, 30, 31,
16823        );
16824        let e: i8 = _mm256_reduce_max_epi8(a);
16825        assert_eq!(31, e);
16826    }
16827
16828    #[simd_test(enable = "avx512bw,avx512vl")]
16829    const fn test_mm256_mask_reduce_max_epi8() {
16830        let a = _mm256_set_epi8(
16831            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16832            24, 25, 26, 27, 28, 29, 30, 31,
16833        );
16834        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
16835        assert_eq!(15, e);
16836    }
16837
16838    #[simd_test(enable = "avx512bw,avx512vl")]
16839    const fn test_mm_reduce_max_epi8() {
16840        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16841        let e: i8 = _mm_reduce_max_epi8(a);
16842        assert_eq!(15, e);
16843    }
16844
16845    #[simd_test(enable = "avx512bw,avx512vl")]
16846    const fn test_mm_mask_reduce_max_epi8() {
16847        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16848        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
16849        assert_eq!(7, e);
16850    }
16851
16852    #[simd_test(enable = "avx512bw,avx512vl")]
16853    const fn test_mm256_reduce_max_epu16() {
16854        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16855        let e: u16 = _mm256_reduce_max_epu16(a);
16856        assert_eq!(15, e);
16857    }
16858
16859    #[simd_test(enable = "avx512bw,avx512vl")]
16860    const fn test_mm256_mask_reduce_max_epu16() {
16861        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16862        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
16863        assert_eq!(7, e);
16864    }
16865
16866    #[simd_test(enable = "avx512bw,avx512vl")]
16867    const fn test_mm_reduce_max_epu16() {
16868        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16869        let e: u16 = _mm_reduce_max_epu16(a);
16870        assert_eq!(7, e);
16871    }
16872
16873    #[simd_test(enable = "avx512bw,avx512vl")]
16874    const fn test_mm_mask_reduce_max_epu16() {
16875        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16876        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
16877        assert_eq!(3, e);
16878    }
16879
16880    #[simd_test(enable = "avx512bw,avx512vl")]
16881    const fn test_mm256_reduce_max_epu8() {
16882        let a = _mm256_set_epi8(
16883            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16884            24, 25, 26, 27, 28, 29, 30, 31,
16885        );
16886        let e: u8 = _mm256_reduce_max_epu8(a);
16887        assert_eq!(31, e);
16888    }
16889
16890    #[simd_test(enable = "avx512bw,avx512vl")]
16891    const fn test_mm256_mask_reduce_max_epu8() {
16892        let a = _mm256_set_epi8(
16893            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16894            24, 25, 26, 27, 28, 29, 30, 31,
16895        );
16896        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16897        assert_eq!(15, e);
16898    }
16899
16900    #[simd_test(enable = "avx512bw,avx512vl")]
16901    const fn test_mm_reduce_max_epu8() {
16902        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16903        let e: u8 = _mm_reduce_max_epu8(a);
16904        assert_eq!(15, e);
16905    }
16906
16907    #[simd_test(enable = "avx512bw,avx512vl")]
16908    const fn test_mm_mask_reduce_max_epu8() {
16909        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16910        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16911        assert_eq!(7, e);
16912    }
16913
16914    #[simd_test(enable = "avx512bw,avx512vl")]
16915    const fn test_mm256_reduce_min_epi16() {
16916        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16917        let e: i16 = _mm256_reduce_min_epi16(a);
16918        assert_eq!(0, e);
16919    }
16920
16921    #[simd_test(enable = "avx512bw,avx512vl")]
16922    const fn test_mm256_mask_reduce_min_epi16() {
16923        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16924        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16925        assert_eq!(0, e);
16926    }
16927
16928    #[simd_test(enable = "avx512bw,avx512vl")]
16929    const fn test_mm_reduce_min_epi16() {
16930        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16931        let e: i16 = _mm_reduce_min_epi16(a);
16932        assert_eq!(0, e);
16933    }
16934
16935    #[simd_test(enable = "avx512bw,avx512vl")]
16936    const fn test_mm_mask_reduce_min_epi16() {
16937        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16938        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16939        assert_eq!(0, e);
16940    }
16941
16942    #[simd_test(enable = "avx512bw,avx512vl")]
16943    const fn test_mm256_reduce_min_epi8() {
16944        let a = _mm256_set_epi8(
16945            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16946            24, 25, 26, 27, 28, 29, 30, 31,
16947        );
16948        let e: i8 = _mm256_reduce_min_epi8(a);
16949        assert_eq!(0, e);
16950    }
16951
16952    #[simd_test(enable = "avx512bw,avx512vl")]
16953    const fn test_mm256_mask_reduce_min_epi8() {
16954        let a = _mm256_set_epi8(
16955            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16956            24, 25, 26, 27, 28, 29, 30, 31,
16957        );
16958        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16959        assert_eq!(0, e);
16960    }
16961
16962    #[simd_test(enable = "avx512bw,avx512vl")]
16963    const fn test_mm_reduce_min_epi8() {
16964        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16965        let e: i8 = _mm_reduce_min_epi8(a);
16966        assert_eq!(0, e);
16967    }
16968
16969    #[simd_test(enable = "avx512bw,avx512vl")]
16970    const fn test_mm_mask_reduce_min_epi8() {
16971        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16972        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16973        assert_eq!(0, e);
16974    }
16975
16976    #[simd_test(enable = "avx512bw,avx512vl")]
16977    const fn test_mm256_reduce_min_epu16() {
16978        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16979        let e: u16 = _mm256_reduce_min_epu16(a);
16980        assert_eq!(0, e);
16981    }
16982
16983    #[simd_test(enable = "avx512bw,avx512vl")]
16984    const fn test_mm256_mask_reduce_min_epu16() {
16985        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16986        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16987        assert_eq!(0, e);
16988    }
16989
16990    #[simd_test(enable = "avx512bw,avx512vl")]
16991    const fn test_mm_reduce_min_epu16() {
16992        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16993        let e: u16 = _mm_reduce_min_epu16(a);
16994        assert_eq!(0, e);
16995    }
16996
16997    #[simd_test(enable = "avx512bw,avx512vl")]
16998    const fn test_mm_mask_reduce_min_epu16() {
16999        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17000        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
17001        assert_eq!(0, e);
17002    }
17003
17004    #[simd_test(enable = "avx512bw,avx512vl")]
17005    const fn test_mm256_reduce_min_epu8() {
17006        let a = _mm256_set_epi8(
17007            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17008            24, 25, 26, 27, 28, 29, 30, 31,
17009        );
17010        let e: u8 = _mm256_reduce_min_epu8(a);
17011        assert_eq!(0, e);
17012    }
17013
17014    #[simd_test(enable = "avx512bw,avx512vl")]
17015    const fn test_mm256_mask_reduce_min_epu8() {
17016        let a = _mm256_set_epi8(
17017            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17018            24, 25, 26, 27, 28, 29, 30, 31,
17019        );
17020        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
17021        assert_eq!(0, e);
17022    }
17023
17024    #[simd_test(enable = "avx512bw,avx512vl")]
17025    const fn test_mm_reduce_min_epu8() {
17026        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17027        let e: u8 = _mm_reduce_min_epu8(a);
17028        assert_eq!(0, e);
17029    }
17030
17031    #[simd_test(enable = "avx512bw,avx512vl")]
17032    const fn test_mm_mask_reduce_min_epu8() {
17033        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17034        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
17035        assert_eq!(0, e);
17036    }
17037
17038    #[simd_test(enable = "avx512bw,avx512vl")]
17039    const fn test_mm256_reduce_or_epi16() {
17040        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17041        let e = _mm256_reduce_or_epi16(a);
17042        assert_eq!(3, e);
17043    }
17044
17045    #[simd_test(enable = "avx512bw,avx512vl")]
17046    const fn test_mm256_mask_reduce_or_epi16() {
17047        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17048        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
17049        assert_eq!(1, e);
17050    }
17051
17052    #[simd_test(enable = "avx512bw,avx512vl")]
17053    const fn test_mm_reduce_or_epi16() {
17054        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17055        let e = _mm_reduce_or_epi16(a);
17056        assert_eq!(3, e);
17057    }
17058
17059    #[simd_test(enable = "avx512bw,avx512vl")]
17060    const fn test_mm_mask_reduce_or_epi16() {
17061        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17062        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
17063        assert_eq!(1, e);
17064    }
17065
17066    #[simd_test(enable = "avx512bw,avx512vl")]
17067    const fn test_mm256_reduce_or_epi8() {
17068        let a = _mm256_set_epi8(
17069            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17070            2, 2, 2,
17071        );
17072        let e = _mm256_reduce_or_epi8(a);
17073        assert_eq!(3, e);
17074    }
17075
17076    #[simd_test(enable = "avx512bw,avx512vl")]
17077    const fn test_mm256_mask_reduce_or_epi8() {
17078        let a = _mm256_set_epi8(
17079            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17080            2, 2, 2,
17081        );
17082        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
17083        assert_eq!(1, e);
17084    }
17085
17086    #[simd_test(enable = "avx512bw,avx512vl")]
17087    const fn test_mm_reduce_or_epi8() {
17088        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17089        let e = _mm_reduce_or_epi8(a);
17090        assert_eq!(3, e);
17091    }
17092
17093    #[simd_test(enable = "avx512bw,avx512vl")]
17094    const fn test_mm_mask_reduce_or_epi8() {
17095        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17096        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
17097        assert_eq!(1, e);
17098    }
17099
17100    #[simd_test(enable = "avx512bw")]
17101    const fn test_mm512_loadu_epi16() {
17102        #[rustfmt::skip]
17103        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17104        let r = unsafe { _mm512_loadu_epi16(&a[0]) };
17105        #[rustfmt::skip]
17106        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17107        assert_eq_m512i(r, e);
17108    }
17109
17110    #[simd_test(enable = "avx512bw,avx512vl")]
17111    const fn test_mm256_loadu_epi16() {
17112        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17113        let r = unsafe { _mm256_loadu_epi16(&a[0]) };
17114        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17115        assert_eq_m256i(r, e);
17116    }
17117
17118    #[simd_test(enable = "avx512bw,avx512vl")]
17119    const fn test_mm_loadu_epi16() {
17120        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
17121        let r = unsafe { _mm_loadu_epi16(&a[0]) };
17122        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
17123        assert_eq_m128i(r, e);
17124    }
17125
17126    #[simd_test(enable = "avx512bw")]
17127    const fn test_mm512_loadu_epi8() {
17128        #[rustfmt::skip]
17129        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
17130                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17131        let r = unsafe { _mm512_loadu_epi8(&a[0]) };
17132        #[rustfmt::skip]
17133        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
17134                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17135        assert_eq_m512i(r, e);
17136    }
17137
17138    #[simd_test(enable = "avx512bw,avx512vl")]
17139    const fn test_mm256_loadu_epi8() {
17140        #[rustfmt::skip]
17141        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17142        let r = unsafe { _mm256_loadu_epi8(&a[0]) };
17143        #[rustfmt::skip]
17144        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17145        assert_eq_m256i(r, e);
17146    }
17147
17148    #[simd_test(enable = "avx512bw,avx512vl")]
17149    const fn test_mm_loadu_epi8() {
17150        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17151        let r = unsafe { _mm_loadu_epi8(&a[0]) };
17152        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17153        assert_eq_m128i(r, e);
17154    }
17155
17156    #[simd_test(enable = "avx512bw")]
17157    const fn test_mm512_storeu_epi16() {
17158        let a = _mm512_set1_epi16(9);
17159        let mut r = _mm512_undefined_epi32();
17160        unsafe {
17161            _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17162        }
17163        assert_eq_m512i(r, a);
17164    }
17165
17166    #[simd_test(enable = "avx512bw,avx512vl")]
17167    const fn test_mm256_storeu_epi16() {
17168        let a = _mm256_set1_epi16(9);
17169        let mut r = _mm256_set1_epi32(0);
17170        unsafe {
17171            _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17172        }
17173        assert_eq_m256i(r, a);
17174    }
17175
17176    #[simd_test(enable = "avx512bw,avx512vl")]
17177    const fn test_mm_storeu_epi16() {
17178        let a = _mm_set1_epi16(9);
17179        let mut r = _mm_set1_epi32(0);
17180        unsafe {
17181            _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17182        }
17183        assert_eq_m128i(r, a);
17184    }
17185
17186    #[simd_test(enable = "avx512bw")]
17187    const fn test_mm512_storeu_epi8() {
17188        let a = _mm512_set1_epi8(9);
17189        let mut r = _mm512_undefined_epi32();
17190        unsafe {
17191            _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17192        }
17193        assert_eq_m512i(r, a);
17194    }
17195
17196    #[simd_test(enable = "avx512bw,avx512vl")]
17197    const fn test_mm256_storeu_epi8() {
17198        let a = _mm256_set1_epi8(9);
17199        let mut r = _mm256_set1_epi32(0);
17200        unsafe {
17201            _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17202        }
17203        assert_eq_m256i(r, a);
17204    }
17205
17206    #[simd_test(enable = "avx512bw,avx512vl")]
17207    const fn test_mm_storeu_epi8() {
17208        let a = _mm_set1_epi8(9);
17209        let mut r = _mm_set1_epi32(0);
17210        unsafe {
17211            _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17212        }
17213        assert_eq_m128i(r, a);
17214    }
17215
17216    #[simd_test(enable = "avx512bw")]
17217    const fn test_mm512_mask_loadu_epi16() {
17218        let src = _mm512_set1_epi16(42);
17219        let a = &[
17220            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17221            24, 25, 26, 27, 28, 29, 30, 31, 32,
17222        ];
17223        let p = a.as_ptr();
17224        let m = 0b10101010_11001100_11101000_11001010;
17225        let r = unsafe { _mm512_mask_loadu_epi16(src, m, black_box(p)) };
17226        let e = &[
17227            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17228            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17229        ];
17230        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17231        assert_eq_m512i(r, e);
17232    }
17233
17234    #[simd_test(enable = "avx512bw")]
17235    const fn test_mm512_maskz_loadu_epi16() {
17236        let a = &[
17237            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17238            24, 25, 26, 27, 28, 29, 30, 31, 32,
17239        ];
17240        let p = a.as_ptr();
17241        let m = 0b10101010_11001100_11101000_11001010;
17242        let r = unsafe { _mm512_maskz_loadu_epi16(m, black_box(p)) };
17243        let e = &[
17244            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17245            26, 0, 28, 0, 30, 0, 32,
17246        ];
17247        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17248        assert_eq_m512i(r, e);
17249    }
17250
17251    #[simd_test(enable = "avx512bw")]
17252    const fn test_mm512_mask_storeu_epi16() {
17253        let mut r = [42_i16; 32];
17254        let a = &[
17255            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17256            24, 25, 26, 27, 28, 29, 30, 31, 32,
17257        ];
17258        let a = unsafe { _mm512_loadu_epi16(a.as_ptr()) };
17259        let m = 0b10101010_11001100_11101000_11001010;
17260        unsafe {
17261            _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17262        }
17263        let e = &[
17264            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17265            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17266        ];
17267        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17268        assert_eq_m512i(unsafe { _mm512_loadu_epi16(r.as_ptr()) }, e);
17269    }
17270
17271    #[simd_test(enable = "avx512bw")]
17272    const fn test_mm512_mask_loadu_epi8() {
17273        let src = _mm512_set1_epi8(42);
17274        let a = &[
17275            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17276            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17277            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17278        ];
17279        let p = a.as_ptr();
17280        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17281        let r = unsafe { _mm512_mask_loadu_epi8(src, m, black_box(p)) };
17282        let e = &[
17283            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17284            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17285            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17286        ];
17287        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17288        assert_eq_m512i(r, e);
17289    }
17290
17291    #[simd_test(enable = "avx512bw")]
17292    const fn test_mm512_maskz_loadu_epi8() {
17293        let a = &[
17294            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17295            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17296            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17297        ];
17298        let p = a.as_ptr();
17299        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17300        let r = unsafe { _mm512_maskz_loadu_epi8(m, black_box(p)) };
17301        let e = &[
17302            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17303            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
17304            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
17305        ];
17306        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17307        assert_eq_m512i(r, e);
17308    }
17309
17310    #[simd_test(enable = "avx512bw")]
17311    const fn test_mm512_mask_storeu_epi8() {
17312        let mut r = [42_i8; 64];
17313        let a = &[
17314            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17315            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17316            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17317        ];
17318        let a = unsafe { _mm512_loadu_epi8(a.as_ptr()) };
17319        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17320        unsafe {
17321            _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17322        }
17323        let e = &[
17324            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17325            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17326            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17327        ];
17328        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17329        assert_eq_m512i(unsafe { _mm512_loadu_epi8(r.as_ptr()) }, e);
17330    }
17331
17332    #[simd_test(enable = "avx512bw,avx512vl")]
17333    const fn test_mm256_mask_loadu_epi16() {
17334        let src = _mm256_set1_epi16(42);
17335        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17336        let p = a.as_ptr();
17337        let m = 0b11101000_11001010;
17338        let r = unsafe { _mm256_mask_loadu_epi16(src, m, black_box(p)) };
17339        let e = &[
17340            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17341        ];
17342        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17343        assert_eq_m256i(r, e);
17344    }
17345
17346    #[simd_test(enable = "avx512bw,avx512vl")]
17347    const fn test_mm256_maskz_loadu_epi16() {
17348        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17349        let p = a.as_ptr();
17350        let m = 0b11101000_11001010;
17351        let r = unsafe { _mm256_maskz_loadu_epi16(m, black_box(p)) };
17352        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17353        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17354        assert_eq_m256i(r, e);
17355    }
17356
17357    #[simd_test(enable = "avx512bw,avx512vl")]
17358    const fn test_mm256_mask_storeu_epi16() {
17359        let mut r = [42_i16; 16];
17360        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17361        let a = unsafe { _mm256_loadu_epi16(a.as_ptr()) };
17362        let m = 0b11101000_11001010;
17363        unsafe {
17364            _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17365        }
17366        let e = &[
17367            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17368        ];
17369        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17370        assert_eq_m256i(unsafe { _mm256_loadu_epi16(r.as_ptr()) }, e);
17371    }
17372
17373    #[simd_test(enable = "avx512bw,avx512vl")]
17374    const fn test_mm256_mask_loadu_epi8() {
17375        let src = _mm256_set1_epi8(42);
17376        let a = &[
17377            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17378            24, 25, 26, 27, 28, 29, 30, 31, 32,
17379        ];
17380        let p = a.as_ptr();
17381        let m = 0b10101010_11001100_11101000_11001010;
17382        let r = unsafe { _mm256_mask_loadu_epi8(src, m, black_box(p)) };
17383        let e = &[
17384            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17385            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17386        ];
17387        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17388        assert_eq_m256i(r, e);
17389    }
17390
17391    #[simd_test(enable = "avx512bw,avx512vl")]
17392    const fn test_mm256_maskz_loadu_epi8() {
17393        let a = &[
17394            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17395            24, 25, 26, 27, 28, 29, 30, 31, 32,
17396        ];
17397        let p = a.as_ptr();
17398        let m = 0b10101010_11001100_11101000_11001010;
17399        let r = unsafe { _mm256_maskz_loadu_epi8(m, black_box(p)) };
17400        let e = &[
17401            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17402            26, 0, 28, 0, 30, 0, 32,
17403        ];
17404        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17405        assert_eq_m256i(r, e);
17406    }
17407
17408    #[simd_test(enable = "avx512bw,avx512vl")]
17409    const fn test_mm256_mask_storeu_epi8() {
17410        let mut r = [42_i8; 32];
17411        let a = &[
17412            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17413            24, 25, 26, 27, 28, 29, 30, 31, 32,
17414        ];
17415        let a = unsafe { _mm256_loadu_epi8(a.as_ptr()) };
17416        let m = 0b10101010_11001100_11101000_11001010;
17417        unsafe {
17418            _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17419        }
17420        let e = &[
17421            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17422            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17423        ];
17424        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17425        assert_eq_m256i(unsafe { _mm256_loadu_epi8(r.as_ptr()) }, e);
17426    }
17427
17428    #[simd_test(enable = "avx512bw,avx512vl")]
17429    const fn test_mm_mask_loadu_epi16() {
17430        let src = _mm_set1_epi16(42);
17431        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17432        let p = a.as_ptr();
17433        let m = 0b11001010;
17434        let r = unsafe { _mm_mask_loadu_epi16(src, m, black_box(p)) };
17435        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17436        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17437        assert_eq_m128i(r, e);
17438    }
17439
17440    #[simd_test(enable = "avx512bw,avx512vl")]
17441    const fn test_mm_maskz_loadu_epi16() {
17442        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17443        let p = a.as_ptr();
17444        let m = 0b11001010;
17445        let r = unsafe { _mm_maskz_loadu_epi16(m, black_box(p)) };
17446        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
17447        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17448        assert_eq_m128i(r, e);
17449    }
17450
17451    #[simd_test(enable = "avx512bw,avx512vl")]
17452    const fn test_mm_mask_storeu_epi16() {
17453        let mut r = [42_i16; 8];
17454        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17455        let a = unsafe { _mm_loadu_epi16(a.as_ptr()) };
17456        let m = 0b11001010;
17457        unsafe { _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a) };
17458        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17459        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17460        assert_eq_m128i(unsafe { _mm_loadu_epi16(r.as_ptr()) }, e);
17461    }
17462
17463    #[simd_test(enable = "avx512bw,avx512vl")]
17464    const fn test_mm_mask_loadu_epi8() {
17465        let src = _mm_set1_epi8(42);
17466        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17467        let p = a.as_ptr();
17468        let m = 0b11101000_11001010;
17469        let r = unsafe { _mm_mask_loadu_epi8(src, m, black_box(p)) };
17470        let e = &[
17471            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17472        ];
17473        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17474        assert_eq_m128i(r, e);
17475    }
17476
17477    #[simd_test(enable = "avx512bw,avx512vl")]
17478    const fn test_mm_maskz_loadu_epi8() {
17479        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17480        let p = a.as_ptr();
17481        let m = 0b11101000_11001010;
17482        let r = unsafe { _mm_maskz_loadu_epi8(m, black_box(p)) };
17483        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17484        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17485        assert_eq_m128i(r, e);
17486    }
17487
17488    #[simd_test(enable = "avx512bw,avx512vl")]
17489    const fn test_mm_mask_storeu_epi8() {
17490        let mut r = [42_i8; 16];
17491        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17492        let a = unsafe { _mm_loadu_epi8(a.as_ptr()) };
17493        let m = 0b11101000_11001010;
17494        unsafe { _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a) };
17495        let e = &[
17496            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17497        ];
17498        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17499        assert_eq_m128i(unsafe { _mm_loadu_epi8(r.as_ptr()) }, e);
17500    }
17501
17502    #[simd_test(enable = "avx512bw")]
17503    fn test_mm512_madd_epi16() {
17504        let a = _mm512_set1_epi16(1);
17505        let b = _mm512_set1_epi16(1);
17506        let r = _mm512_madd_epi16(a, b);
17507        let e = _mm512_set1_epi32(2);
17508        assert_eq_m512i(r, e);
17509    }
17510
17511    #[simd_test(enable = "avx512bw")]
17512    fn test_mm512_mask_madd_epi16() {
17513        let a = _mm512_set1_epi16(1);
17514        let b = _mm512_set1_epi16(1);
17515        let r = _mm512_mask_madd_epi16(a, 0, a, b);
17516        assert_eq_m512i(r, a);
17517        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
17518        let e = _mm512_set_epi32(
17519            1 << 16 | 1,
17520            1 << 16 | 1,
17521            1 << 16 | 1,
17522            1 << 16 | 1,
17523            1 << 16 | 1,
17524            1 << 16 | 1,
17525            1 << 16 | 1,
17526            1 << 16 | 1,
17527            1 << 16 | 1,
17528            1 << 16 | 1,
17529            1 << 16 | 1,
17530            1 << 16 | 1,
17531            2,
17532            2,
17533            2,
17534            2,
17535        );
17536        assert_eq_m512i(r, e);
17537    }
17538
17539    #[simd_test(enable = "avx512bw")]
17540    fn test_mm512_maskz_madd_epi16() {
17541        let a = _mm512_set1_epi16(1);
17542        let b = _mm512_set1_epi16(1);
17543        let r = _mm512_maskz_madd_epi16(0, a, b);
17544        assert_eq_m512i(r, _mm512_setzero_si512());
17545        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
17546        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
17547        assert_eq_m512i(r, e);
17548    }
17549
17550    #[simd_test(enable = "avx512bw,avx512vl")]
17551    fn test_mm256_mask_madd_epi16() {
17552        let a = _mm256_set1_epi16(1);
17553        let b = _mm256_set1_epi16(1);
17554        let r = _mm256_mask_madd_epi16(a, 0, a, b);
17555        assert_eq_m256i(r, a);
17556        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
17557        let e = _mm256_set_epi32(
17558            1 << 16 | 1,
17559            1 << 16 | 1,
17560            1 << 16 | 1,
17561            1 << 16 | 1,
17562            2,
17563            2,
17564            2,
17565            2,
17566        );
17567        assert_eq_m256i(r, e);
17568    }
17569
17570    #[simd_test(enable = "avx512bw,avx512vl")]
17571    fn test_mm256_maskz_madd_epi16() {
17572        let a = _mm256_set1_epi16(1);
17573        let b = _mm256_set1_epi16(1);
17574        let r = _mm256_maskz_madd_epi16(0, a, b);
17575        assert_eq_m256i(r, _mm256_setzero_si256());
17576        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
17577        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
17578        assert_eq_m256i(r, e);
17579    }
17580
17581    #[simd_test(enable = "avx512bw,avx512vl")]
17582    fn test_mm_mask_madd_epi16() {
17583        let a = _mm_set1_epi16(1);
17584        let b = _mm_set1_epi16(1);
17585        let r = _mm_mask_madd_epi16(a, 0, a, b);
17586        assert_eq_m128i(r, a);
17587        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
17588        let e = _mm_set_epi32(2, 2, 2, 2);
17589        assert_eq_m128i(r, e);
17590    }
17591
17592    #[simd_test(enable = "avx512bw,avx512vl")]
17593    fn test_mm_maskz_madd_epi16() {
17594        let a = _mm_set1_epi16(1);
17595        let b = _mm_set1_epi16(1);
17596        let r = _mm_maskz_madd_epi16(0, a, b);
17597        assert_eq_m128i(r, _mm_setzero_si128());
17598        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
17599        let e = _mm_set_epi32(2, 2, 2, 2);
17600        assert_eq_m128i(r, e);
17601    }
17602
17603    #[simd_test(enable = "avx512bw")]
17604    fn test_mm512_maddubs_epi16() {
17605        let a = _mm512_set1_epi8(1);
17606        let b = _mm512_set1_epi8(1);
17607        let r = _mm512_maddubs_epi16(a, b);
17608        let e = _mm512_set1_epi16(2);
17609        assert_eq_m512i(r, e);
17610    }
17611
17612    #[simd_test(enable = "avx512bw")]
17613    fn test_mm512_mask_maddubs_epi16() {
17614        let a = _mm512_set1_epi8(1);
17615        let b = _mm512_set1_epi8(1);
17616        let src = _mm512_set1_epi16(1);
17617        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
17618        assert_eq_m512i(r, src);
17619        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
17620        #[rustfmt::skip]
17621        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17622                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
17623        assert_eq_m512i(r, e);
17624    }
17625
17626    #[simd_test(enable = "avx512bw")]
17627    fn test_mm512_maskz_maddubs_epi16() {
17628        let a = _mm512_set1_epi8(1);
17629        let b = _mm512_set1_epi8(1);
17630        let r = _mm512_maskz_maddubs_epi16(0, a, b);
17631        assert_eq_m512i(r, _mm512_setzero_si512());
17632        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
17633        #[rustfmt::skip]
17634        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
17635                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17636        assert_eq_m512i(r, e);
17637    }
17638
17639    #[simd_test(enable = "avx512bw,avx512vl")]
17640    fn test_mm256_mask_maddubs_epi16() {
17641        let a = _mm256_set1_epi8(1);
17642        let b = _mm256_set1_epi8(1);
17643        let src = _mm256_set1_epi16(1);
17644        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
17645        assert_eq_m256i(r, src);
17646        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
17647        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17648        assert_eq_m256i(r, e);
17649    }
17650
17651    #[simd_test(enable = "avx512bw,avx512vl")]
17652    fn test_mm256_maskz_maddubs_epi16() {
17653        let a = _mm256_set1_epi8(1);
17654        let b = _mm256_set1_epi8(1);
17655        let r = _mm256_maskz_maddubs_epi16(0, a, b);
17656        assert_eq_m256i(r, _mm256_setzero_si256());
17657        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
17658        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17659        assert_eq_m256i(r, e);
17660    }
17661
17662    #[simd_test(enable = "avx512bw,avx512vl")]
17663    fn test_mm_mask_maddubs_epi16() {
17664        let a = _mm_set1_epi8(1);
17665        let b = _mm_set1_epi8(1);
17666        let src = _mm_set1_epi16(1);
17667        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
17668        assert_eq_m128i(r, src);
17669        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
17670        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17671        assert_eq_m128i(r, e);
17672    }
17673
17674    #[simd_test(enable = "avx512bw,avx512vl")]
17675    fn test_mm_maskz_maddubs_epi16() {
17676        let a = _mm_set1_epi8(1);
17677        let b = _mm_set1_epi8(1);
17678        let r = _mm_maskz_maddubs_epi16(0, a, b);
17679        assert_eq_m128i(r, _mm_setzero_si128());
17680        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
17681        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
17682        assert_eq_m128i(r, e);
17683    }
17684
17685    #[simd_test(enable = "avx512bw")]
17686    fn test_mm512_packs_epi32() {
17687        let a = _mm512_set1_epi32(i32::MAX);
17688        let b = _mm512_set1_epi32(1);
17689        let r = _mm512_packs_epi32(a, b);
17690        #[rustfmt::skip]
17691        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
17692                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17693        assert_eq_m512i(r, e);
17694    }
17695
17696    #[simd_test(enable = "avx512bw")]
17697    fn test_mm512_mask_packs_epi32() {
17698        let a = _mm512_set1_epi32(i32::MAX);
17699        let b = _mm512_set1_epi32(1 << 16 | 1);
17700        let r = _mm512_mask_packs_epi32(a, 0, a, b);
17701        assert_eq_m512i(r, a);
17702        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17703        #[rustfmt::skip]
17704        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17705                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17706        assert_eq_m512i(r, e);
17707    }
17708
17709    #[simd_test(enable = "avx512bw")]
17710    fn test_mm512_maskz_packs_epi32() {
17711        let a = _mm512_set1_epi32(i32::MAX);
17712        let b = _mm512_set1_epi32(1);
17713        let r = _mm512_maskz_packs_epi32(0, a, b);
17714        assert_eq_m512i(r, _mm512_setzero_si512());
17715        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
17716        #[rustfmt::skip]
17717        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17718                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17719        assert_eq_m512i(r, e);
17720    }
17721
17722    #[simd_test(enable = "avx512bw,avx512vl")]
17723    fn test_mm256_mask_packs_epi32() {
17724        let a = _mm256_set1_epi32(i32::MAX);
17725        let b = _mm256_set1_epi32(1 << 16 | 1);
17726        let r = _mm256_mask_packs_epi32(a, 0, a, b);
17727        assert_eq_m256i(r, a);
17728        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
17729        #[rustfmt::skip]
17730        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17731        assert_eq_m256i(r, e);
17732    }
17733
17734    #[simd_test(enable = "avx512bw,avx512vl")]
17735    fn test_mm256_maskz_packs_epi32() {
17736        let a = _mm256_set1_epi32(i32::MAX);
17737        let b = _mm256_set1_epi32(1);
17738        let r = _mm256_maskz_packs_epi32(0, a, b);
17739        assert_eq_m256i(r, _mm256_setzero_si256());
17740        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
17741        #[rustfmt::skip]
17742        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17743        assert_eq_m256i(r, e);
17744    }
17745
17746    #[simd_test(enable = "avx512bw,avx512vl")]
17747    fn test_mm_mask_packs_epi32() {
17748        let a = _mm_set1_epi32(i32::MAX);
17749        let b = _mm_set1_epi32(1 << 16 | 1);
17750        let r = _mm_mask_packs_epi32(a, 0, a, b);
17751        assert_eq_m128i(r, a);
17752        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
17753        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17754        assert_eq_m128i(r, e);
17755    }
17756
17757    #[simd_test(enable = "avx512bw,avx512vl")]
17758    fn test_mm_maskz_packs_epi32() {
17759        let a = _mm_set1_epi32(i32::MAX);
17760        let b = _mm_set1_epi32(1);
17761        let r = _mm_maskz_packs_epi32(0, a, b);
17762        assert_eq_m128i(r, _mm_setzero_si128());
17763        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
17764        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17765        assert_eq_m128i(r, e);
17766    }
17767
17768    #[simd_test(enable = "avx512bw")]
17769    fn test_mm512_packs_epi16() {
17770        let a = _mm512_set1_epi16(i16::MAX);
17771        let b = _mm512_set1_epi16(1);
17772        let r = _mm512_packs_epi16(a, b);
17773        #[rustfmt::skip]
17774        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17775                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17776                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17777                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17778        assert_eq_m512i(r, e);
17779    }
17780
17781    #[simd_test(enable = "avx512bw")]
17782    fn test_mm512_mask_packs_epi16() {
17783        let a = _mm512_set1_epi16(i16::MAX);
17784        let b = _mm512_set1_epi16(1 << 8 | 1);
17785        let r = _mm512_mask_packs_epi16(a, 0, a, b);
17786        assert_eq_m512i(r, a);
17787        let r = _mm512_mask_packs_epi16(
17788            b,
17789            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17790            a,
17791            b,
17792        );
17793        #[rustfmt::skip]
17794        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17795                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17796                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17797                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17798        assert_eq_m512i(r, e);
17799    }
17800
17801    #[simd_test(enable = "avx512bw")]
17802    fn test_mm512_maskz_packs_epi16() {
17803        let a = _mm512_set1_epi16(i16::MAX);
17804        let b = _mm512_set1_epi16(1);
17805        let r = _mm512_maskz_packs_epi16(0, a, b);
17806        assert_eq_m512i(r, _mm512_setzero_si512());
17807        let r = _mm512_maskz_packs_epi16(
17808            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17809            a,
17810            b,
17811        );
17812        #[rustfmt::skip]
17813        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17814                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17815                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17816                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17817        assert_eq_m512i(r, e);
17818    }
17819
17820    #[simd_test(enable = "avx512bw,avx512vl")]
17821    fn test_mm256_mask_packs_epi16() {
17822        let a = _mm256_set1_epi16(i16::MAX);
17823        let b = _mm256_set1_epi16(1 << 8 | 1);
17824        let r = _mm256_mask_packs_epi16(a, 0, a, b);
17825        assert_eq_m256i(r, a);
17826        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17827        #[rustfmt::skip]
17828        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17829                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17830        assert_eq_m256i(r, e);
17831    }
17832
17833    #[simd_test(enable = "avx512bw,avx512vl")]
17834    fn test_mm256_maskz_packs_epi16() {
17835        let a = _mm256_set1_epi16(i16::MAX);
17836        let b = _mm256_set1_epi16(1);
17837        let r = _mm256_maskz_packs_epi16(0, a, b);
17838        assert_eq_m256i(r, _mm256_setzero_si256());
17839        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
17840        #[rustfmt::skip]
17841        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17842                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17843        assert_eq_m256i(r, e);
17844    }
17845
17846    #[simd_test(enable = "avx512bw,avx512vl")]
17847    fn test_mm_mask_packs_epi16() {
17848        let a = _mm_set1_epi16(i16::MAX);
17849        let b = _mm_set1_epi16(1 << 8 | 1);
17850        let r = _mm_mask_packs_epi16(a, 0, a, b);
17851        assert_eq_m128i(r, a);
17852        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
17853        #[rustfmt::skip]
17854        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17855        assert_eq_m128i(r, e);
17856    }
17857
17858    #[simd_test(enable = "avx512bw,avx512vl")]
17859    fn test_mm_maskz_packs_epi16() {
17860        let a = _mm_set1_epi16(i16::MAX);
17861        let b = _mm_set1_epi16(1);
17862        let r = _mm_maskz_packs_epi16(0, a, b);
17863        assert_eq_m128i(r, _mm_setzero_si128());
17864        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
17865        #[rustfmt::skip]
17866        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17867        assert_eq_m128i(r, e);
17868    }
17869
17870    #[simd_test(enable = "avx512bw")]
17871    fn test_mm512_packus_epi32() {
17872        let a = _mm512_set1_epi32(-1);
17873        let b = _mm512_set1_epi32(1);
17874        let r = _mm512_packus_epi32(a, b);
17875        #[rustfmt::skip]
17876        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
17877                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
17878        assert_eq_m512i(r, e);
17879    }
17880
17881    #[simd_test(enable = "avx512bw")]
17882    fn test_mm512_mask_packus_epi32() {
17883        let a = _mm512_set1_epi32(-1);
17884        let b = _mm512_set1_epi32(1 << 16 | 1);
17885        let r = _mm512_mask_packus_epi32(a, 0, a, b);
17886        assert_eq_m512i(r, a);
17887        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17888        #[rustfmt::skip]
17889        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17890                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17891        assert_eq_m512i(r, e);
17892    }
17893
17894    #[simd_test(enable = "avx512bw")]
17895    fn test_mm512_maskz_packus_epi32() {
17896        let a = _mm512_set1_epi32(-1);
17897        let b = _mm512_set1_epi32(1);
17898        let r = _mm512_maskz_packus_epi32(0, a, b);
17899        assert_eq_m512i(r, _mm512_setzero_si512());
17900        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
17901        #[rustfmt::skip]
17902        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17903                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17904        assert_eq_m512i(r, e);
17905    }
17906
17907    #[simd_test(enable = "avx512bw,avx512vl")]
17908    fn test_mm256_mask_packus_epi32() {
17909        let a = _mm256_set1_epi32(-1);
17910        let b = _mm256_set1_epi32(1 << 16 | 1);
17911        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17912        assert_eq_m256i(r, a);
17913        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17914        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17915        assert_eq_m256i(r, e);
17916    }
17917
17918    #[simd_test(enable = "avx512bw,avx512vl")]
17919    fn test_mm256_maskz_packus_epi32() {
17920        let a = _mm256_set1_epi32(-1);
17921        let b = _mm256_set1_epi32(1);
17922        let r = _mm256_maskz_packus_epi32(0, a, b);
17923        assert_eq_m256i(r, _mm256_setzero_si256());
17924        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17925        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17926        assert_eq_m256i(r, e);
17927    }
17928
17929    #[simd_test(enable = "avx512bw,avx512vl")]
17930    fn test_mm_mask_packus_epi32() {
17931        let a = _mm_set1_epi32(-1);
17932        let b = _mm_set1_epi32(1 << 16 | 1);
17933        let r = _mm_mask_packus_epi32(a, 0, a, b);
17934        assert_eq_m128i(r, a);
17935        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17936        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17937        assert_eq_m128i(r, e);
17938    }
17939
17940    #[simd_test(enable = "avx512bw,avx512vl")]
17941    fn test_mm_maskz_packus_epi32() {
17942        let a = _mm_set1_epi32(-1);
17943        let b = _mm_set1_epi32(1);
17944        let r = _mm_maskz_packus_epi32(0, a, b);
17945        assert_eq_m128i(r, _mm_setzero_si128());
17946        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17947        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17948        assert_eq_m128i(r, e);
17949    }
17950
17951    #[simd_test(enable = "avx512bw")]
17952    fn test_mm512_packus_epi16() {
17953        let a = _mm512_set1_epi16(-1);
17954        let b = _mm512_set1_epi16(1);
17955        let r = _mm512_packus_epi16(a, b);
17956        #[rustfmt::skip]
17957        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17958                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17959                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17960                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17961        assert_eq_m512i(r, e);
17962    }
17963
17964    #[simd_test(enable = "avx512bw")]
17965    fn test_mm512_mask_packus_epi16() {
17966        let a = _mm512_set1_epi16(-1);
17967        let b = _mm512_set1_epi16(1 << 8 | 1);
17968        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17969        assert_eq_m512i(r, a);
17970        let r = _mm512_mask_packus_epi16(
17971            b,
17972            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17973            a,
17974            b,
17975        );
17976        #[rustfmt::skip]
17977        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17978                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17979                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17980                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17981        assert_eq_m512i(r, e);
17982    }
17983
17984    #[simd_test(enable = "avx512bw")]
17985    fn test_mm512_maskz_packus_epi16() {
17986        let a = _mm512_set1_epi16(-1);
17987        let b = _mm512_set1_epi16(1);
17988        let r = _mm512_maskz_packus_epi16(0, a, b);
17989        assert_eq_m512i(r, _mm512_setzero_si512());
17990        let r = _mm512_maskz_packus_epi16(
17991            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17992            a,
17993            b,
17994        );
17995        #[rustfmt::skip]
17996        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17997                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17998                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17999                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18000        assert_eq_m512i(r, e);
18001    }
18002
18003    #[simd_test(enable = "avx512bw,avx512vl")]
18004    fn test_mm256_mask_packus_epi16() {
18005        let a = _mm256_set1_epi16(-1);
18006        let b = _mm256_set1_epi16(1 << 8 | 1);
18007        let r = _mm256_mask_packus_epi16(a, 0, a, b);
18008        assert_eq_m256i(r, a);
18009        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
18010        #[rustfmt::skip]
18011        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18012                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18013        assert_eq_m256i(r, e);
18014    }
18015
18016    #[simd_test(enable = "avx512bw,avx512vl")]
18017    fn test_mm256_maskz_packus_epi16() {
18018        let a = _mm256_set1_epi16(-1);
18019        let b = _mm256_set1_epi16(1);
18020        let r = _mm256_maskz_packus_epi16(0, a, b);
18021        assert_eq_m256i(r, _mm256_setzero_si256());
18022        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
18023        #[rustfmt::skip]
18024        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18025                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18026        assert_eq_m256i(r, e);
18027    }
18028
18029    #[simd_test(enable = "avx512bw,avx512vl")]
18030    fn test_mm_mask_packus_epi16() {
18031        let a = _mm_set1_epi16(-1);
18032        let b = _mm_set1_epi16(1 << 8 | 1);
18033        let r = _mm_mask_packus_epi16(a, 0, a, b);
18034        assert_eq_m128i(r, a);
18035        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
18036        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18037        assert_eq_m128i(r, e);
18038    }
18039
18040    #[simd_test(enable = "avx512bw,avx512vl")]
18041    fn test_mm_maskz_packus_epi16() {
18042        let a = _mm_set1_epi16(-1);
18043        let b = _mm_set1_epi16(1);
18044        let r = _mm_maskz_packus_epi16(0, a, b);
18045        assert_eq_m128i(r, _mm_setzero_si128());
18046        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
18047        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18048        assert_eq_m128i(r, e);
18049    }
18050
18051    #[simd_test(enable = "avx512bw")]
18052    const fn test_mm512_avg_epu16() {
18053        let a = _mm512_set1_epi16(1);
18054        let b = _mm512_set1_epi16(1);
18055        let r = _mm512_avg_epu16(a, b);
18056        let e = _mm512_set1_epi16(1);
18057        assert_eq_m512i(r, e);
18058    }
18059
18060    #[simd_test(enable = "avx512bw")]
18061    const fn test_mm512_mask_avg_epu16() {
18062        let a = _mm512_set1_epi16(1);
18063        let b = _mm512_set1_epi16(1);
18064        let r = _mm512_mask_avg_epu16(a, 0, a, b);
18065        assert_eq_m512i(r, a);
18066        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
18067        #[rustfmt::skip]
18068        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18069                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18070        assert_eq_m512i(r, e);
18071    }
18072
18073    #[simd_test(enable = "avx512bw")]
18074    const fn test_mm512_maskz_avg_epu16() {
18075        let a = _mm512_set1_epi16(1);
18076        let b = _mm512_set1_epi16(1);
18077        let r = _mm512_maskz_avg_epu16(0, a, b);
18078        assert_eq_m512i(r, _mm512_setzero_si512());
18079        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
18080        #[rustfmt::skip]
18081        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18082                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18083        assert_eq_m512i(r, e);
18084    }
18085
18086    #[simd_test(enable = "avx512bw,avx512vl")]
18087    const fn test_mm256_mask_avg_epu16() {
18088        let a = _mm256_set1_epi16(1);
18089        let b = _mm256_set1_epi16(1);
18090        let r = _mm256_mask_avg_epu16(a, 0, a, b);
18091        assert_eq_m256i(r, a);
18092        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
18093        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18094        assert_eq_m256i(r, e);
18095    }
18096
18097    #[simd_test(enable = "avx512bw,avx512vl")]
18098    const fn test_mm256_maskz_avg_epu16() {
18099        let a = _mm256_set1_epi16(1);
18100        let b = _mm256_set1_epi16(1);
18101        let r = _mm256_maskz_avg_epu16(0, a, b);
18102        assert_eq_m256i(r, _mm256_setzero_si256());
18103        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
18104        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18105        assert_eq_m256i(r, e);
18106    }
18107
18108    #[simd_test(enable = "avx512bw,avx512vl")]
18109    const fn test_mm_mask_avg_epu16() {
18110        let a = _mm_set1_epi16(1);
18111        let b = _mm_set1_epi16(1);
18112        let r = _mm_mask_avg_epu16(a, 0, a, b);
18113        assert_eq_m128i(r, a);
18114        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
18115        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
18116        assert_eq_m128i(r, e);
18117    }
18118
18119    #[simd_test(enable = "avx512bw,avx512vl")]
18120    const fn test_mm_maskz_avg_epu16() {
18121        let a = _mm_set1_epi16(1);
18122        let b = _mm_set1_epi16(1);
18123        let r = _mm_maskz_avg_epu16(0, a, b);
18124        assert_eq_m128i(r, _mm_setzero_si128());
18125        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
18126        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
18127        assert_eq_m128i(r, e);
18128    }
18129
18130    #[simd_test(enable = "avx512bw")]
18131    const fn test_mm512_avg_epu8() {
18132        let a = _mm512_set1_epi8(1);
18133        let b = _mm512_set1_epi8(1);
18134        let r = _mm512_avg_epu8(a, b);
18135        let e = _mm512_set1_epi8(1);
18136        assert_eq_m512i(r, e);
18137    }
18138
18139    #[simd_test(enable = "avx512bw")]
18140    const fn test_mm512_mask_avg_epu8() {
18141        let a = _mm512_set1_epi8(1);
18142        let b = _mm512_set1_epi8(1);
18143        let r = _mm512_mask_avg_epu8(a, 0, a, b);
18144        assert_eq_m512i(r, a);
18145        let r = _mm512_mask_avg_epu8(
18146            a,
18147            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18148            a,
18149            b,
18150        );
18151        #[rustfmt::skip]
18152        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18153                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18154                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18155                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18156        assert_eq_m512i(r, e);
18157    }
18158
18159    #[simd_test(enable = "avx512bw")]
18160    const fn test_mm512_maskz_avg_epu8() {
18161        let a = _mm512_set1_epi8(1);
18162        let b = _mm512_set1_epi8(1);
18163        let r = _mm512_maskz_avg_epu8(0, a, b);
18164        assert_eq_m512i(r, _mm512_setzero_si512());
18165        let r = _mm512_maskz_avg_epu8(
18166            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
18167            a,
18168            b,
18169        );
18170        #[rustfmt::skip]
18171        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18172                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18173                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18174                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18175        assert_eq_m512i(r, e);
18176    }
18177
18178    #[simd_test(enable = "avx512bw,avx512vl")]
18179    const fn test_mm256_mask_avg_epu8() {
18180        let a = _mm256_set1_epi8(1);
18181        let b = _mm256_set1_epi8(1);
18182        let r = _mm256_mask_avg_epu8(a, 0, a, b);
18183        assert_eq_m256i(r, a);
18184        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
18185        #[rustfmt::skip]
18186        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18187                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18188        assert_eq_m256i(r, e);
18189    }
18190
18191    #[simd_test(enable = "avx512bw,avx512vl")]
18192    const fn test_mm256_maskz_avg_epu8() {
18193        let a = _mm256_set1_epi8(1);
18194        let b = _mm256_set1_epi8(1);
18195        let r = _mm256_maskz_avg_epu8(0, a, b);
18196        assert_eq_m256i(r, _mm256_setzero_si256());
18197        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
18198        #[rustfmt::skip]
18199        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18200                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18201        assert_eq_m256i(r, e);
18202    }
18203
18204    #[simd_test(enable = "avx512bw,avx512vl")]
18205    const fn test_mm_mask_avg_epu8() {
18206        let a = _mm_set1_epi8(1);
18207        let b = _mm_set1_epi8(1);
18208        let r = _mm_mask_avg_epu8(a, 0, a, b);
18209        assert_eq_m128i(r, a);
18210        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
18211        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18212        assert_eq_m128i(r, e);
18213    }
18214
18215    #[simd_test(enable = "avx512bw,avx512vl")]
18216    const fn test_mm_maskz_avg_epu8() {
18217        let a = _mm_set1_epi8(1);
18218        let b = _mm_set1_epi8(1);
18219        let r = _mm_maskz_avg_epu8(0, a, b);
18220        assert_eq_m128i(r, _mm_setzero_si128());
18221        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
18222        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18223        assert_eq_m128i(r, e);
18224    }
18225
18226    #[simd_test(enable = "avx512bw")]
18227    fn test_mm512_sll_epi16() {
18228        let a = _mm512_set1_epi16(1 << 15);
18229        let count = _mm_set1_epi16(2);
18230        let r = _mm512_sll_epi16(a, count);
18231        let e = _mm512_set1_epi16(0);
18232        assert_eq_m512i(r, e);
18233    }
18234
18235    #[simd_test(enable = "avx512bw")]
18236    fn test_mm512_mask_sll_epi16() {
18237        let a = _mm512_set1_epi16(1 << 15);
18238        let count = _mm_set1_epi16(2);
18239        let r = _mm512_mask_sll_epi16(a, 0, a, count);
18240        assert_eq_m512i(r, a);
18241        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18242        let e = _mm512_set1_epi16(0);
18243        assert_eq_m512i(r, e);
18244    }
18245
18246    #[simd_test(enable = "avx512bw")]
18247    fn test_mm512_maskz_sll_epi16() {
18248        let a = _mm512_set1_epi16(1 << 15);
18249        let count = _mm_set1_epi16(2);
18250        let r = _mm512_maskz_sll_epi16(0, a, count);
18251        assert_eq_m512i(r, _mm512_setzero_si512());
18252        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
18253        let e = _mm512_set1_epi16(0);
18254        assert_eq_m512i(r, e);
18255    }
18256
18257    #[simd_test(enable = "avx512bw,avx512vl")]
18258    fn test_mm256_mask_sll_epi16() {
18259        let a = _mm256_set1_epi16(1 << 15);
18260        let count = _mm_set1_epi16(2);
18261        let r = _mm256_mask_sll_epi16(a, 0, a, count);
18262        assert_eq_m256i(r, a);
18263        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
18264        let e = _mm256_set1_epi16(0);
18265        assert_eq_m256i(r, e);
18266    }
18267
18268    #[simd_test(enable = "avx512bw,avx512vl")]
18269    fn test_mm256_maskz_sll_epi16() {
18270        let a = _mm256_set1_epi16(1 << 15);
18271        let count = _mm_set1_epi16(2);
18272        let r = _mm256_maskz_sll_epi16(0, a, count);
18273        assert_eq_m256i(r, _mm256_setzero_si256());
18274        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
18275        let e = _mm256_set1_epi16(0);
18276        assert_eq_m256i(r, e);
18277    }
18278
18279    #[simd_test(enable = "avx512bw,avx512vl")]
18280    fn test_mm_mask_sll_epi16() {
18281        let a = _mm_set1_epi16(1 << 15);
18282        let count = _mm_set1_epi16(2);
18283        let r = _mm_mask_sll_epi16(a, 0, a, count);
18284        assert_eq_m128i(r, a);
18285        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
18286        let e = _mm_set1_epi16(0);
18287        assert_eq_m128i(r, e);
18288    }
18289
18290    #[simd_test(enable = "avx512bw,avx512vl")]
18291    fn test_mm_maskz_sll_epi16() {
18292        let a = _mm_set1_epi16(1 << 15);
18293        let count = _mm_set1_epi16(2);
18294        let r = _mm_maskz_sll_epi16(0, a, count);
18295        assert_eq_m128i(r, _mm_setzero_si128());
18296        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
18297        let e = _mm_set1_epi16(0);
18298        assert_eq_m128i(r, e);
18299    }
18300
18301    #[simd_test(enable = "avx512bw")]
18302    const fn test_mm512_slli_epi16() {
18303        let a = _mm512_set1_epi16(1 << 15);
18304        let r = _mm512_slli_epi16::<1>(a);
18305        let e = _mm512_set1_epi16(0);
18306        assert_eq_m512i(r, e);
18307    }
18308
18309    #[simd_test(enable = "avx512bw")]
18310    const fn test_mm512_mask_slli_epi16() {
18311        let a = _mm512_set1_epi16(1 << 15);
18312        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
18313        assert_eq_m512i(r, a);
18314        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
18315        let e = _mm512_set1_epi16(0);
18316        assert_eq_m512i(r, e);
18317    }
18318
18319    #[simd_test(enable = "avx512bw")]
18320    const fn test_mm512_maskz_slli_epi16() {
18321        let a = _mm512_set1_epi16(1 << 15);
18322        let r = _mm512_maskz_slli_epi16::<1>(0, a);
18323        assert_eq_m512i(r, _mm512_setzero_si512());
18324        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
18325        let e = _mm512_set1_epi16(0);
18326        assert_eq_m512i(r, e);
18327    }
18328
18329    #[simd_test(enable = "avx512bw,avx512vl")]
18330    const fn test_mm256_mask_slli_epi16() {
18331        let a = _mm256_set1_epi16(1 << 15);
18332        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
18333        assert_eq_m256i(r, a);
18334        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
18335        let e = _mm256_set1_epi16(0);
18336        assert_eq_m256i(r, e);
18337    }
18338
18339    #[simd_test(enable = "avx512bw,avx512vl")]
18340    const fn test_mm256_maskz_slli_epi16() {
18341        let a = _mm256_set1_epi16(1 << 15);
18342        let r = _mm256_maskz_slli_epi16::<1>(0, a);
18343        assert_eq_m256i(r, _mm256_setzero_si256());
18344        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
18345        let e = _mm256_set1_epi16(0);
18346        assert_eq_m256i(r, e);
18347    }
18348
18349    #[simd_test(enable = "avx512bw,avx512vl")]
18350    const fn test_mm_mask_slli_epi16() {
18351        let a = _mm_set1_epi16(1 << 15);
18352        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
18353        assert_eq_m128i(r, a);
18354        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
18355        let e = _mm_set1_epi16(0);
18356        assert_eq_m128i(r, e);
18357    }
18358
18359    #[simd_test(enable = "avx512bw,avx512vl")]
18360    const fn test_mm_maskz_slli_epi16() {
18361        let a = _mm_set1_epi16(1 << 15);
18362        let r = _mm_maskz_slli_epi16::<1>(0, a);
18363        assert_eq_m128i(r, _mm_setzero_si128());
18364        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
18365        let e = _mm_set1_epi16(0);
18366        assert_eq_m128i(r, e);
18367    }
18368
18369    #[simd_test(enable = "avx512bw")]
18370    const fn test_mm512_sllv_epi16() {
18371        let a = _mm512_set1_epi16(1 << 15);
18372        let count = _mm512_set1_epi16(2);
18373        let r = _mm512_sllv_epi16(a, count);
18374        let e = _mm512_set1_epi16(0);
18375        assert_eq_m512i(r, e);
18376    }
18377
18378    #[simd_test(enable = "avx512bw")]
18379    const fn test_mm512_mask_sllv_epi16() {
18380        let a = _mm512_set1_epi16(1 << 15);
18381        let count = _mm512_set1_epi16(2);
18382        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
18383        assert_eq_m512i(r, a);
18384        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18385        let e = _mm512_set1_epi16(0);
18386        assert_eq_m512i(r, e);
18387    }
18388
18389    #[simd_test(enable = "avx512bw")]
18390    const fn test_mm512_maskz_sllv_epi16() {
18391        let a = _mm512_set1_epi16(1 << 15);
18392        let count = _mm512_set1_epi16(2);
18393        let r = _mm512_maskz_sllv_epi16(0, a, count);
18394        assert_eq_m512i(r, _mm512_setzero_si512());
18395        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18396        let e = _mm512_set1_epi16(0);
18397        assert_eq_m512i(r, e);
18398    }
18399
18400    #[simd_test(enable = "avx512bw,avx512vl")]
18401    const fn test_mm256_sllv_epi16() {
18402        let a = _mm256_set1_epi16(1 << 15);
18403        let count = _mm256_set1_epi16(2);
18404        let r = _mm256_sllv_epi16(a, count);
18405        let e = _mm256_set1_epi16(0);
18406        assert_eq_m256i(r, e);
18407    }
18408
18409    #[simd_test(enable = "avx512bw,avx512vl")]
18410    const fn test_mm256_mask_sllv_epi16() {
18411        let a = _mm256_set1_epi16(1 << 15);
18412        let count = _mm256_set1_epi16(2);
18413        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
18414        assert_eq_m256i(r, a);
18415        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
18416        let e = _mm256_set1_epi16(0);
18417        assert_eq_m256i(r, e);
18418    }
18419
18420    #[simd_test(enable = "avx512bw,avx512vl")]
18421    const fn test_mm256_maskz_sllv_epi16() {
18422        let a = _mm256_set1_epi16(1 << 15);
18423        let count = _mm256_set1_epi16(2);
18424        let r = _mm256_maskz_sllv_epi16(0, a, count);
18425        assert_eq_m256i(r, _mm256_setzero_si256());
18426        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
18427        let e = _mm256_set1_epi16(0);
18428        assert_eq_m256i(r, e);
18429    }
18430
18431    #[simd_test(enable = "avx512bw,avx512vl")]
18432    const fn test_mm_sllv_epi16() {
18433        let a = _mm_set1_epi16(1 << 15);
18434        let count = _mm_set1_epi16(2);
18435        let r = _mm_sllv_epi16(a, count);
18436        let e = _mm_set1_epi16(0);
18437        assert_eq_m128i(r, e);
18438    }
18439
18440    #[simd_test(enable = "avx512bw,avx512vl")]
18441    const fn test_mm_mask_sllv_epi16() {
18442        let a = _mm_set1_epi16(1 << 15);
18443        let count = _mm_set1_epi16(2);
18444        let r = _mm_mask_sllv_epi16(a, 0, a, count);
18445        assert_eq_m128i(r, a);
18446        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
18447        let e = _mm_set1_epi16(0);
18448        assert_eq_m128i(r, e);
18449    }
18450
18451    #[simd_test(enable = "avx512bw,avx512vl")]
18452    const fn test_mm_maskz_sllv_epi16() {
18453        let a = _mm_set1_epi16(1 << 15);
18454        let count = _mm_set1_epi16(2);
18455        let r = _mm_maskz_sllv_epi16(0, a, count);
18456        assert_eq_m128i(r, _mm_setzero_si128());
18457        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
18458        let e = _mm_set1_epi16(0);
18459        assert_eq_m128i(r, e);
18460    }
18461
18462    #[simd_test(enable = "avx512bw")]
18463    fn test_mm512_srl_epi16() {
18464        let a = _mm512_set1_epi16(1 << 1);
18465        let count = _mm_set1_epi16(2);
18466        let r = _mm512_srl_epi16(a, count);
18467        let e = _mm512_set1_epi16(0);
18468        assert_eq_m512i(r, e);
18469    }
18470
18471    #[simd_test(enable = "avx512bw")]
18472    fn test_mm512_mask_srl_epi16() {
18473        let a = _mm512_set1_epi16(1 << 1);
18474        let count = _mm_set1_epi16(2);
18475        let r = _mm512_mask_srl_epi16(a, 0, a, count);
18476        assert_eq_m512i(r, a);
18477        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18478        let e = _mm512_set1_epi16(0);
18479        assert_eq_m512i(r, e);
18480    }
18481
18482    #[simd_test(enable = "avx512bw")]
18483    fn test_mm512_maskz_srl_epi16() {
18484        let a = _mm512_set1_epi16(1 << 1);
18485        let count = _mm_set1_epi16(2);
18486        let r = _mm512_maskz_srl_epi16(0, a, count);
18487        assert_eq_m512i(r, _mm512_setzero_si512());
18488        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
18489        let e = _mm512_set1_epi16(0);
18490        assert_eq_m512i(r, e);
18491    }
18492
18493    #[simd_test(enable = "avx512bw,avx512vl")]
18494    fn test_mm256_mask_srl_epi16() {
18495        let a = _mm256_set1_epi16(1 << 1);
18496        let count = _mm_set1_epi16(2);
18497        let r = _mm256_mask_srl_epi16(a, 0, a, count);
18498        assert_eq_m256i(r, a);
18499        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
18500        let e = _mm256_set1_epi16(0);
18501        assert_eq_m256i(r, e);
18502    }
18503
18504    #[simd_test(enable = "avx512bw,avx512vl")]
18505    fn test_mm256_maskz_srl_epi16() {
18506        let a = _mm256_set1_epi16(1 << 1);
18507        let count = _mm_set1_epi16(2);
18508        let r = _mm256_maskz_srl_epi16(0, a, count);
18509        assert_eq_m256i(r, _mm256_setzero_si256());
18510        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
18511        let e = _mm256_set1_epi16(0);
18512        assert_eq_m256i(r, e);
18513    }
18514
18515    #[simd_test(enable = "avx512bw,avx512vl")]
18516    fn test_mm_mask_srl_epi16() {
18517        let a = _mm_set1_epi16(1 << 1);
18518        let count = _mm_set1_epi16(2);
18519        let r = _mm_mask_srl_epi16(a, 0, a, count);
18520        assert_eq_m128i(r, a);
18521        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
18522        let e = _mm_set1_epi16(0);
18523        assert_eq_m128i(r, e);
18524    }
18525
18526    #[simd_test(enable = "avx512bw,avx512vl")]
18527    fn test_mm_maskz_srl_epi16() {
18528        let a = _mm_set1_epi16(1 << 1);
18529        let count = _mm_set1_epi16(2);
18530        let r = _mm_maskz_srl_epi16(0, a, count);
18531        assert_eq_m128i(r, _mm_setzero_si128());
18532        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
18533        let e = _mm_set1_epi16(0);
18534        assert_eq_m128i(r, e);
18535    }
18536
18537    #[simd_test(enable = "avx512bw")]
18538    const fn test_mm512_srli_epi16() {
18539        let a = _mm512_set1_epi16(1 << 1);
18540        let r = _mm512_srli_epi16::<2>(a);
18541        let e = _mm512_set1_epi16(0);
18542        assert_eq_m512i(r, e);
18543    }
18544
18545    #[simd_test(enable = "avx512bw")]
18546    const fn test_mm512_mask_srli_epi16() {
18547        let a = _mm512_set1_epi16(1 << 1);
18548        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
18549        assert_eq_m512i(r, a);
18550        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18551        let e = _mm512_set1_epi16(0);
18552        assert_eq_m512i(r, e);
18553    }
18554
18555    #[simd_test(enable = "avx512bw")]
18556    const fn test_mm512_maskz_srli_epi16() {
18557        let a = _mm512_set1_epi16(1 << 1);
18558        let r = _mm512_maskz_srli_epi16::<2>(0, a);
18559        assert_eq_m512i(r, _mm512_setzero_si512());
18560        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18561        let e = _mm512_set1_epi16(0);
18562        assert_eq_m512i(r, e);
18563    }
18564
18565    #[simd_test(enable = "avx512bw,avx512vl")]
18566    const fn test_mm256_mask_srli_epi16() {
18567        let a = _mm256_set1_epi16(1 << 1);
18568        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
18569        assert_eq_m256i(r, a);
18570        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
18571        let e = _mm256_set1_epi16(0);
18572        assert_eq_m256i(r, e);
18573    }
18574
18575    #[simd_test(enable = "avx512bw,avx512vl")]
18576    const fn test_mm256_maskz_srli_epi16() {
18577        let a = _mm256_set1_epi16(1 << 1);
18578        let r = _mm256_maskz_srli_epi16::<2>(0, a);
18579        assert_eq_m256i(r, _mm256_setzero_si256());
18580        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
18581        let e = _mm256_set1_epi16(0);
18582        assert_eq_m256i(r, e);
18583    }
18584
18585    #[simd_test(enable = "avx512bw,avx512vl")]
18586    const fn test_mm_mask_srli_epi16() {
18587        let a = _mm_set1_epi16(1 << 1);
18588        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
18589        assert_eq_m128i(r, a);
18590        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
18591        let e = _mm_set1_epi16(0);
18592        assert_eq_m128i(r, e);
18593    }
18594
18595    #[simd_test(enable = "avx512bw,avx512vl")]
18596    const fn test_mm_maskz_srli_epi16() {
18597        let a = _mm_set1_epi16(1 << 1);
18598        let r = _mm_maskz_srli_epi16::<2>(0, a);
18599        assert_eq_m128i(r, _mm_setzero_si128());
18600        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
18601        let e = _mm_set1_epi16(0);
18602        assert_eq_m128i(r, e);
18603    }
18604
18605    #[simd_test(enable = "avx512bw")]
18606    const fn test_mm512_srlv_epi16() {
18607        let a = _mm512_set1_epi16(1 << 1);
18608        let count = _mm512_set1_epi16(2);
18609        let r = _mm512_srlv_epi16(a, count);
18610        let e = _mm512_set1_epi16(0);
18611        assert_eq_m512i(r, e);
18612    }
18613
18614    #[simd_test(enable = "avx512bw")]
18615    const fn test_mm512_mask_srlv_epi16() {
18616        let a = _mm512_set1_epi16(1 << 1);
18617        let count = _mm512_set1_epi16(2);
18618        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
18619        assert_eq_m512i(r, a);
18620        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18621        let e = _mm512_set1_epi16(0);
18622        assert_eq_m512i(r, e);
18623    }
18624
18625    #[simd_test(enable = "avx512bw")]
18626    const fn test_mm512_maskz_srlv_epi16() {
18627        let a = _mm512_set1_epi16(1 << 1);
18628        let count = _mm512_set1_epi16(2);
18629        let r = _mm512_maskz_srlv_epi16(0, a, count);
18630        assert_eq_m512i(r, _mm512_setzero_si512());
18631        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18632        let e = _mm512_set1_epi16(0);
18633        assert_eq_m512i(r, e);
18634    }
18635
18636    #[simd_test(enable = "avx512bw,avx512vl")]
18637    const fn test_mm256_srlv_epi16() {
18638        let a = _mm256_set1_epi16(1 << 1);
18639        let count = _mm256_set1_epi16(2);
18640        let r = _mm256_srlv_epi16(a, count);
18641        let e = _mm256_set1_epi16(0);
18642        assert_eq_m256i(r, e);
18643    }
18644
18645    #[simd_test(enable = "avx512bw,avx512vl")]
18646    const fn test_mm256_mask_srlv_epi16() {
18647        let a = _mm256_set1_epi16(1 << 1);
18648        let count = _mm256_set1_epi16(2);
18649        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
18650        assert_eq_m256i(r, a);
18651        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
18652        let e = _mm256_set1_epi16(0);
18653        assert_eq_m256i(r, e);
18654    }
18655
18656    #[simd_test(enable = "avx512bw,avx512vl")]
18657    const fn test_mm256_maskz_srlv_epi16() {
18658        let a = _mm256_set1_epi16(1 << 1);
18659        let count = _mm256_set1_epi16(2);
18660        let r = _mm256_maskz_srlv_epi16(0, a, count);
18661        assert_eq_m256i(r, _mm256_setzero_si256());
18662        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
18663        let e = _mm256_set1_epi16(0);
18664        assert_eq_m256i(r, e);
18665    }
18666
18667    #[simd_test(enable = "avx512bw,avx512vl")]
18668    const fn test_mm_srlv_epi16() {
18669        let a = _mm_set1_epi16(1 << 1);
18670        let count = _mm_set1_epi16(2);
18671        let r = _mm_srlv_epi16(a, count);
18672        let e = _mm_set1_epi16(0);
18673        assert_eq_m128i(r, e);
18674    }
18675
18676    #[simd_test(enable = "avx512bw,avx512vl")]
18677    const fn test_mm_mask_srlv_epi16() {
18678        let a = _mm_set1_epi16(1 << 1);
18679        let count = _mm_set1_epi16(2);
18680        let r = _mm_mask_srlv_epi16(a, 0, a, count);
18681        assert_eq_m128i(r, a);
18682        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
18683        let e = _mm_set1_epi16(0);
18684        assert_eq_m128i(r, e);
18685    }
18686
18687    #[simd_test(enable = "avx512bw,avx512vl")]
18688    const fn test_mm_maskz_srlv_epi16() {
18689        let a = _mm_set1_epi16(1 << 1);
18690        let count = _mm_set1_epi16(2);
18691        let r = _mm_maskz_srlv_epi16(0, a, count);
18692        assert_eq_m128i(r, _mm_setzero_si128());
18693        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
18694        let e = _mm_set1_epi16(0);
18695        assert_eq_m128i(r, e);
18696    }
18697
18698    #[simd_test(enable = "avx512bw")]
18699    fn test_mm512_sra_epi16() {
18700        let a = _mm512_set1_epi16(8);
18701        let count = _mm_set1_epi16(1);
18702        let r = _mm512_sra_epi16(a, count);
18703        let e = _mm512_set1_epi16(0);
18704        assert_eq_m512i(r, e);
18705    }
18706
18707    #[simd_test(enable = "avx512bw")]
18708    fn test_mm512_mask_sra_epi16() {
18709        let a = _mm512_set1_epi16(8);
18710        let count = _mm_set1_epi16(1);
18711        let r = _mm512_mask_sra_epi16(a, 0, a, count);
18712        assert_eq_m512i(r, a);
18713        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18714        let e = _mm512_set1_epi16(0);
18715        assert_eq_m512i(r, e);
18716    }
18717
18718    #[simd_test(enable = "avx512bw")]
18719    fn test_mm512_maskz_sra_epi16() {
18720        let a = _mm512_set1_epi16(8);
18721        let count = _mm_set1_epi16(1);
18722        let r = _mm512_maskz_sra_epi16(0, a, count);
18723        assert_eq_m512i(r, _mm512_setzero_si512());
18724        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
18725        let e = _mm512_set1_epi16(0);
18726        assert_eq_m512i(r, e);
18727    }
18728
18729    #[simd_test(enable = "avx512bw,avx512vl")]
18730    fn test_mm256_mask_sra_epi16() {
18731        let a = _mm256_set1_epi16(8);
18732        let count = _mm_set1_epi16(1);
18733        let r = _mm256_mask_sra_epi16(a, 0, a, count);
18734        assert_eq_m256i(r, a);
18735        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
18736        let e = _mm256_set1_epi16(0);
18737        assert_eq_m256i(r, e);
18738    }
18739
18740    #[simd_test(enable = "avx512bw,avx512vl")]
18741    fn test_mm256_maskz_sra_epi16() {
18742        let a = _mm256_set1_epi16(8);
18743        let count = _mm_set1_epi16(1);
18744        let r = _mm256_maskz_sra_epi16(0, a, count);
18745        assert_eq_m256i(r, _mm256_setzero_si256());
18746        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
18747        let e = _mm256_set1_epi16(0);
18748        assert_eq_m256i(r, e);
18749    }
18750
18751    #[simd_test(enable = "avx512bw,avx512vl")]
18752    fn test_mm_mask_sra_epi16() {
18753        let a = _mm_set1_epi16(8);
18754        let count = _mm_set1_epi16(1);
18755        let r = _mm_mask_sra_epi16(a, 0, a, count);
18756        assert_eq_m128i(r, a);
18757        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
18758        let e = _mm_set1_epi16(0);
18759        assert_eq_m128i(r, e);
18760    }
18761
18762    #[simd_test(enable = "avx512bw,avx512vl")]
18763    fn test_mm_maskz_sra_epi16() {
18764        let a = _mm_set1_epi16(8);
18765        let count = _mm_set1_epi16(1);
18766        let r = _mm_maskz_sra_epi16(0, a, count);
18767        assert_eq_m128i(r, _mm_setzero_si128());
18768        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
18769        let e = _mm_set1_epi16(0);
18770        assert_eq_m128i(r, e);
18771    }
18772
18773    #[simd_test(enable = "avx512bw")]
18774    const fn test_mm512_srai_epi16() {
18775        let a = _mm512_set1_epi16(8);
18776        let r = _mm512_srai_epi16::<2>(a);
18777        let e = _mm512_set1_epi16(2);
18778        assert_eq_m512i(r, e);
18779    }
18780
18781    #[simd_test(enable = "avx512bw")]
18782    const fn test_mm512_mask_srai_epi16() {
18783        let a = _mm512_set1_epi16(8);
18784        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
18785        assert_eq_m512i(r, a);
18786        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18787        let e = _mm512_set1_epi16(2);
18788        assert_eq_m512i(r, e);
18789    }
18790
18791    #[simd_test(enable = "avx512bw")]
18792    const fn test_mm512_maskz_srai_epi16() {
18793        let a = _mm512_set1_epi16(8);
18794        let r = _mm512_maskz_srai_epi16::<2>(0, a);
18795        assert_eq_m512i(r, _mm512_setzero_si512());
18796        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18797        let e = _mm512_set1_epi16(2);
18798        assert_eq_m512i(r, e);
18799    }
18800
18801    #[simd_test(enable = "avx512bw,avx512vl")]
18802    const fn test_mm256_mask_srai_epi16() {
18803        let a = _mm256_set1_epi16(8);
18804        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
18805        assert_eq_m256i(r, a);
18806        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
18807        let e = _mm256_set1_epi16(2);
18808        assert_eq_m256i(r, e);
18809    }
18810
18811    #[simd_test(enable = "avx512bw,avx512vl")]
18812    const fn test_mm256_maskz_srai_epi16() {
18813        let a = _mm256_set1_epi16(8);
18814        let r = _mm256_maskz_srai_epi16::<2>(0, a);
18815        assert_eq_m256i(r, _mm256_setzero_si256());
18816        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
18817        let e = _mm256_set1_epi16(2);
18818        assert_eq_m256i(r, e);
18819    }
18820
18821    #[simd_test(enable = "avx512bw,avx512vl")]
18822    const fn test_mm_mask_srai_epi16() {
18823        let a = _mm_set1_epi16(8);
18824        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
18825        assert_eq_m128i(r, a);
18826        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
18827        let e = _mm_set1_epi16(2);
18828        assert_eq_m128i(r, e);
18829    }
18830
18831    #[simd_test(enable = "avx512bw,avx512vl")]
18832    const fn test_mm_maskz_srai_epi16() {
18833        let a = _mm_set1_epi16(8);
18834        let r = _mm_maskz_srai_epi16::<2>(0, a);
18835        assert_eq_m128i(r, _mm_setzero_si128());
18836        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
18837        let e = _mm_set1_epi16(2);
18838        assert_eq_m128i(r, e);
18839    }
18840
18841    #[simd_test(enable = "avx512bw")]
18842    const fn test_mm512_srav_epi16() {
18843        let a = _mm512_set1_epi16(8);
18844        let count = _mm512_set1_epi16(2);
18845        let r = _mm512_srav_epi16(a, count);
18846        let e = _mm512_set1_epi16(2);
18847        assert_eq_m512i(r, e);
18848    }
18849
18850    #[simd_test(enable = "avx512bw")]
18851    const fn test_mm512_mask_srav_epi16() {
18852        let a = _mm512_set1_epi16(8);
18853        let count = _mm512_set1_epi16(2);
18854        let r = _mm512_mask_srav_epi16(a, 0, a, count);
18855        assert_eq_m512i(r, a);
18856        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18857        let e = _mm512_set1_epi16(2);
18858        assert_eq_m512i(r, e);
18859    }
18860
18861    #[simd_test(enable = "avx512bw")]
18862    const fn test_mm512_maskz_srav_epi16() {
18863        let a = _mm512_set1_epi16(8);
18864        let count = _mm512_set1_epi16(2);
18865        let r = _mm512_maskz_srav_epi16(0, a, count);
18866        assert_eq_m512i(r, _mm512_setzero_si512());
18867        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
18868        let e = _mm512_set1_epi16(2);
18869        assert_eq_m512i(r, e);
18870    }
18871
18872    #[simd_test(enable = "avx512bw,avx512vl")]
18873    const fn test_mm256_srav_epi16() {
18874        let a = _mm256_set1_epi16(8);
18875        let count = _mm256_set1_epi16(2);
18876        let r = _mm256_srav_epi16(a, count);
18877        let e = _mm256_set1_epi16(2);
18878        assert_eq_m256i(r, e);
18879    }
18880
18881    #[simd_test(enable = "avx512bw,avx512vl")]
18882    const fn test_mm256_mask_srav_epi16() {
18883        let a = _mm256_set1_epi16(8);
18884        let count = _mm256_set1_epi16(2);
18885        let r = _mm256_mask_srav_epi16(a, 0, a, count);
18886        assert_eq_m256i(r, a);
18887        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
18888        let e = _mm256_set1_epi16(2);
18889        assert_eq_m256i(r, e);
18890    }
18891
18892    #[simd_test(enable = "avx512bw,avx512vl")]
18893    const fn test_mm256_maskz_srav_epi16() {
18894        let a = _mm256_set1_epi16(8);
18895        let count = _mm256_set1_epi16(2);
18896        let r = _mm256_maskz_srav_epi16(0, a, count);
18897        assert_eq_m256i(r, _mm256_setzero_si256());
18898        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
18899        let e = _mm256_set1_epi16(2);
18900        assert_eq_m256i(r, e);
18901    }
18902
18903    #[simd_test(enable = "avx512bw,avx512vl")]
18904    const fn test_mm_srav_epi16() {
18905        let a = _mm_set1_epi16(8);
18906        let count = _mm_set1_epi16(2);
18907        let r = _mm_srav_epi16(a, count);
18908        let e = _mm_set1_epi16(2);
18909        assert_eq_m128i(r, e);
18910    }
18911
18912    #[simd_test(enable = "avx512bw,avx512vl")]
18913    const fn test_mm_mask_srav_epi16() {
18914        let a = _mm_set1_epi16(8);
18915        let count = _mm_set1_epi16(2);
18916        let r = _mm_mask_srav_epi16(a, 0, a, count);
18917        assert_eq_m128i(r, a);
18918        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18919        let e = _mm_set1_epi16(2);
18920        assert_eq_m128i(r, e);
18921    }
18922
18923    #[simd_test(enable = "avx512bw,avx512vl")]
18924    const fn test_mm_maskz_srav_epi16() {
18925        let a = _mm_set1_epi16(8);
18926        let count = _mm_set1_epi16(2);
18927        let r = _mm_maskz_srav_epi16(0, a, count);
18928        assert_eq_m128i(r, _mm_setzero_si128());
18929        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18930        let e = _mm_set1_epi16(2);
18931        assert_eq_m128i(r, e);
18932    }
18933
18934    #[simd_test(enable = "avx512bw")]
18935    fn test_mm512_permutex2var_epi16() {
18936        #[rustfmt::skip]
18937        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18938                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18939        #[rustfmt::skip]
18940        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18941                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18942        let b = _mm512_set1_epi16(100);
18943        let r = _mm512_permutex2var_epi16(a, idx, b);
18944        #[rustfmt::skip]
18945        let e = _mm512_set_epi16(
18946            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18947            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18948        );
18949        assert_eq_m512i(r, e);
18950    }
18951
18952    #[simd_test(enable = "avx512bw")]
18953    fn test_mm512_mask_permutex2var_epi16() {
18954        #[rustfmt::skip]
18955        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18956                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18957        #[rustfmt::skip]
18958        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18959                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18960        let b = _mm512_set1_epi16(100);
18961        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18962        assert_eq_m512i(r, a);
18963        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18964        #[rustfmt::skip]
18965        let e = _mm512_set_epi16(
18966            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18967            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18968        );
18969        assert_eq_m512i(r, e);
18970    }
18971
18972    #[simd_test(enable = "avx512bw")]
18973    fn test_mm512_maskz_permutex2var_epi16() {
18974        #[rustfmt::skip]
18975        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18976                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18977        #[rustfmt::skip]
18978        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18979                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18980        let b = _mm512_set1_epi16(100);
18981        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18982        assert_eq_m512i(r, _mm512_setzero_si512());
18983        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18984        #[rustfmt::skip]
18985        let e = _mm512_set_epi16(
18986            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18987            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18988        );
18989        assert_eq_m512i(r, e);
18990    }
18991
18992    #[simd_test(enable = "avx512bw")]
18993    fn test_mm512_mask2_permutex2var_epi16() {
18994        #[rustfmt::skip]
18995        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18996                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18997        #[rustfmt::skip]
18998        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18999                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
19000        let b = _mm512_set1_epi16(100);
19001        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
19002        assert_eq_m512i(r, idx);
19003        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
19004        #[rustfmt::skip]
19005        let e = _mm512_set_epi16(
19006            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
19007            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
19008        );
19009        assert_eq_m512i(r, e);
19010    }
19011
19012    #[simd_test(enable = "avx512bw,avx512vl")]
19013    fn test_mm256_permutex2var_epi16() {
19014        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19015        #[rustfmt::skip]
19016        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19017        let b = _mm256_set1_epi16(100);
19018        let r = _mm256_permutex2var_epi16(a, idx, b);
19019        let e = _mm256_set_epi16(
19020            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19021        );
19022        assert_eq_m256i(r, e);
19023    }
19024
19025    #[simd_test(enable = "avx512bw,avx512vl")]
19026    fn test_mm256_mask_permutex2var_epi16() {
19027        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19028        #[rustfmt::skip]
19029        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19030        let b = _mm256_set1_epi16(100);
19031        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
19032        assert_eq_m256i(r, a);
19033        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
19034        let e = _mm256_set_epi16(
19035            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19036        );
19037        assert_eq_m256i(r, e);
19038    }
19039
19040    #[simd_test(enable = "avx512bw,avx512vl")]
19041    fn test_mm256_maskz_permutex2var_epi16() {
19042        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19043        #[rustfmt::skip]
19044        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19045        let b = _mm256_set1_epi16(100);
19046        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
19047        assert_eq_m256i(r, _mm256_setzero_si256());
19048        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
19049        let e = _mm256_set_epi16(
19050            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19051        );
19052        assert_eq_m256i(r, e);
19053    }
19054
19055    #[simd_test(enable = "avx512bw,avx512vl")]
19056    fn test_mm256_mask2_permutex2var_epi16() {
19057        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19058        #[rustfmt::skip]
19059        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19060        let b = _mm256_set1_epi16(100);
19061        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
19062        assert_eq_m256i(r, idx);
19063        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
19064        #[rustfmt::skip]
19065        let e = _mm256_set_epi16(
19066            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19067        );
19068        assert_eq_m256i(r, e);
19069    }
19070
19071    #[simd_test(enable = "avx512bw,avx512vl")]
19072    fn test_mm_permutex2var_epi16() {
19073        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19074        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19075        let b = _mm_set1_epi16(100);
19076        let r = _mm_permutex2var_epi16(a, idx, b);
19077        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19078        assert_eq_m128i(r, e);
19079    }
19080
19081    #[simd_test(enable = "avx512bw,avx512vl")]
19082    fn test_mm_mask_permutex2var_epi16() {
19083        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19084        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19085        let b = _mm_set1_epi16(100);
19086        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
19087        assert_eq_m128i(r, a);
19088        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
19089        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19090        assert_eq_m128i(r, e);
19091    }
19092
19093    #[simd_test(enable = "avx512bw,avx512vl")]
19094    fn test_mm_maskz_permutex2var_epi16() {
19095        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19096        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19097        let b = _mm_set1_epi16(100);
19098        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
19099        assert_eq_m128i(r, _mm_setzero_si128());
19100        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
19101        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19102        assert_eq_m128i(r, e);
19103    }
19104
19105    #[simd_test(enable = "avx512bw,avx512vl")]
19106    fn test_mm_mask2_permutex2var_epi16() {
19107        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19108        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19109        let b = _mm_set1_epi16(100);
19110        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
19111        assert_eq_m128i(r, idx);
19112        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
19113        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19114        assert_eq_m128i(r, e);
19115    }
19116
19117    #[simd_test(enable = "avx512bw")]
19118    fn test_mm512_permutexvar_epi16() {
19119        let idx = _mm512_set1_epi16(1);
19120        #[rustfmt::skip]
19121        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19122                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19123        let r = _mm512_permutexvar_epi16(idx, a);
19124        let e = _mm512_set1_epi16(30);
19125        assert_eq_m512i(r, e);
19126    }
19127
19128    #[simd_test(enable = "avx512bw")]
19129    fn test_mm512_mask_permutexvar_epi16() {
19130        let idx = _mm512_set1_epi16(1);
19131        #[rustfmt::skip]
19132        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19133                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19134        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
19135        assert_eq_m512i(r, a);
19136        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
19137        let e = _mm512_set1_epi16(30);
19138        assert_eq_m512i(r, e);
19139    }
19140
19141    #[simd_test(enable = "avx512bw")]
19142    fn test_mm512_maskz_permutexvar_epi16() {
19143        let idx = _mm512_set1_epi16(1);
19144        #[rustfmt::skip]
19145        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19146                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19147        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
19148        assert_eq_m512i(r, _mm512_setzero_si512());
19149        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
19150        let e = _mm512_set1_epi16(30);
19151        assert_eq_m512i(r, e);
19152    }
19153
19154    #[simd_test(enable = "avx512bw,avx512vl")]
19155    fn test_mm256_permutexvar_epi16() {
19156        let idx = _mm256_set1_epi16(1);
19157        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19158        let r = _mm256_permutexvar_epi16(idx, a);
19159        let e = _mm256_set1_epi16(14);
19160        assert_eq_m256i(r, e);
19161    }
19162
19163    #[simd_test(enable = "avx512bw,avx512vl")]
19164    fn test_mm256_mask_permutexvar_epi16() {
19165        let idx = _mm256_set1_epi16(1);
19166        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19167        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
19168        assert_eq_m256i(r, a);
19169        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
19170        let e = _mm256_set1_epi16(14);
19171        assert_eq_m256i(r, e);
19172    }
19173
19174    #[simd_test(enable = "avx512bw,avx512vl")]
19175    fn test_mm256_maskz_permutexvar_epi16() {
19176        let idx = _mm256_set1_epi16(1);
19177        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19178        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
19179        assert_eq_m256i(r, _mm256_setzero_si256());
19180        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
19181        let e = _mm256_set1_epi16(14);
19182        assert_eq_m256i(r, e);
19183    }
19184
19185    #[simd_test(enable = "avx512bw,avx512vl")]
19186    fn test_mm_permutexvar_epi16() {
19187        let idx = _mm_set1_epi16(1);
19188        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19189        let r = _mm_permutexvar_epi16(idx, a);
19190        let e = _mm_set1_epi16(6);
19191        assert_eq_m128i(r, e);
19192    }
19193
19194    #[simd_test(enable = "avx512bw,avx512vl")]
19195    fn test_mm_mask_permutexvar_epi16() {
19196        let idx = _mm_set1_epi16(1);
19197        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19198        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
19199        assert_eq_m128i(r, a);
19200        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
19201        let e = _mm_set1_epi16(6);
19202        assert_eq_m128i(r, e);
19203    }
19204
19205    #[simd_test(enable = "avx512bw,avx512vl")]
19206    fn test_mm_maskz_permutexvar_epi16() {
19207        let idx = _mm_set1_epi16(1);
19208        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19209        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
19210        assert_eq_m128i(r, _mm_setzero_si128());
19211        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
19212        let e = _mm_set1_epi16(6);
19213        assert_eq_m128i(r, e);
19214    }
19215
19216    #[simd_test(enable = "avx512bw")]
19217    const fn test_mm512_mask_blend_epi16() {
19218        let a = _mm512_set1_epi16(1);
19219        let b = _mm512_set1_epi16(2);
19220        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
19221        #[rustfmt::skip]
19222        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19223                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19224        assert_eq_m512i(r, e);
19225    }
19226
19227    #[simd_test(enable = "avx512bw,avx512vl")]
19228    const fn test_mm256_mask_blend_epi16() {
19229        let a = _mm256_set1_epi16(1);
19230        let b = _mm256_set1_epi16(2);
19231        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
19232        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19233        assert_eq_m256i(r, e);
19234    }
19235
19236    #[simd_test(enable = "avx512bw,avx512vl")]
19237    const fn test_mm_mask_blend_epi16() {
19238        let a = _mm_set1_epi16(1);
19239        let b = _mm_set1_epi16(2);
19240        let r = _mm_mask_blend_epi16(0b11110000, a, b);
19241        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
19242        assert_eq_m128i(r, e);
19243    }
19244
19245    #[simd_test(enable = "avx512bw")]
19246    const fn test_mm512_mask_blend_epi8() {
19247        let a = _mm512_set1_epi8(1);
19248        let b = _mm512_set1_epi8(2);
19249        let r = _mm512_mask_blend_epi8(
19250            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
19251            a,
19252            b,
19253        );
19254        #[rustfmt::skip]
19255        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19256                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19257                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19258                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19259        assert_eq_m512i(r, e);
19260    }
19261
19262    #[simd_test(enable = "avx512bw,avx512vl")]
19263    const fn test_mm256_mask_blend_epi8() {
19264        let a = _mm256_set1_epi8(1);
19265        let b = _mm256_set1_epi8(2);
19266        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
19267        #[rustfmt::skip]
19268        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19269                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19270        assert_eq_m256i(r, e);
19271    }
19272
19273    #[simd_test(enable = "avx512bw,avx512vl")]
19274    const fn test_mm_mask_blend_epi8() {
19275        let a = _mm_set1_epi8(1);
19276        let b = _mm_set1_epi8(2);
19277        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
19278        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19279        assert_eq_m128i(r, e);
19280    }
19281
19282    #[simd_test(enable = "avx512bw")]
19283    const fn test_mm512_broadcastw_epi16() {
19284        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19285        let r = _mm512_broadcastw_epi16(a);
19286        let e = _mm512_set1_epi16(24);
19287        assert_eq_m512i(r, e);
19288    }
19289
19290    #[simd_test(enable = "avx512bw")]
19291    const fn test_mm512_mask_broadcastw_epi16() {
19292        let src = _mm512_set1_epi16(1);
19293        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19294        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
19295        assert_eq_m512i(r, src);
19296        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19297        let e = _mm512_set1_epi16(24);
19298        assert_eq_m512i(r, e);
19299    }
19300
19301    #[simd_test(enable = "avx512bw")]
19302    const fn test_mm512_maskz_broadcastw_epi16() {
19303        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19304        let r = _mm512_maskz_broadcastw_epi16(0, a);
19305        assert_eq_m512i(r, _mm512_setzero_si512());
19306        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
19307        let e = _mm512_set1_epi16(24);
19308        assert_eq_m512i(r, e);
19309    }
19310
19311    #[simd_test(enable = "avx512bw,avx512vl")]
19312    const fn test_mm256_mask_broadcastw_epi16() {
19313        let src = _mm256_set1_epi16(1);
19314        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19315        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
19316        assert_eq_m256i(r, src);
19317        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
19318        let e = _mm256_set1_epi16(24);
19319        assert_eq_m256i(r, e);
19320    }
19321
19322    #[simd_test(enable = "avx512bw,avx512vl")]
19323    const fn test_mm256_maskz_broadcastw_epi16() {
19324        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19325        let r = _mm256_maskz_broadcastw_epi16(0, a);
19326        assert_eq_m256i(r, _mm256_setzero_si256());
19327        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
19328        let e = _mm256_set1_epi16(24);
19329        assert_eq_m256i(r, e);
19330    }
19331
19332    #[simd_test(enable = "avx512bw,avx512vl")]
19333    const fn test_mm_mask_broadcastw_epi16() {
19334        let src = _mm_set1_epi16(1);
19335        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19336        let r = _mm_mask_broadcastw_epi16(src, 0, a);
19337        assert_eq_m128i(r, src);
19338        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
19339        let e = _mm_set1_epi16(24);
19340        assert_eq_m128i(r, e);
19341    }
19342
19343    #[simd_test(enable = "avx512bw,avx512vl")]
19344    const fn test_mm_maskz_broadcastw_epi16() {
19345        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19346        let r = _mm_maskz_broadcastw_epi16(0, a);
19347        assert_eq_m128i(r, _mm_setzero_si128());
19348        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
19349        let e = _mm_set1_epi16(24);
19350        assert_eq_m128i(r, e);
19351    }
19352
19353    #[simd_test(enable = "avx512bw")]
19354    const fn test_mm512_broadcastb_epi8() {
19355        let a = _mm_set_epi8(
19356            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19357        );
19358        let r = _mm512_broadcastb_epi8(a);
19359        let e = _mm512_set1_epi8(32);
19360        assert_eq_m512i(r, e);
19361    }
19362
19363    #[simd_test(enable = "avx512bw")]
19364    const fn test_mm512_mask_broadcastb_epi8() {
19365        let src = _mm512_set1_epi8(1);
19366        let a = _mm_set_epi8(
19367            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19368        );
19369        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
19370        assert_eq_m512i(r, src);
19371        let r = _mm512_mask_broadcastb_epi8(
19372            src,
19373            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19374            a,
19375        );
19376        let e = _mm512_set1_epi8(32);
19377        assert_eq_m512i(r, e);
19378    }
19379
19380    #[simd_test(enable = "avx512bw")]
19381    const fn test_mm512_maskz_broadcastb_epi8() {
19382        let a = _mm_set_epi8(
19383            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19384        );
19385        let r = _mm512_maskz_broadcastb_epi8(0, a);
19386        assert_eq_m512i(r, _mm512_setzero_si512());
19387        let r = _mm512_maskz_broadcastb_epi8(
19388            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19389            a,
19390        );
19391        let e = _mm512_set1_epi8(32);
19392        assert_eq_m512i(r, e);
19393    }
19394
19395    #[simd_test(enable = "avx512bw,avx512vl")]
19396    const fn test_mm256_mask_broadcastb_epi8() {
19397        let src = _mm256_set1_epi8(1);
19398        let a = _mm_set_epi8(
19399            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19400        );
19401        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
19402        assert_eq_m256i(r, src);
19403        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19404        let e = _mm256_set1_epi8(32);
19405        assert_eq_m256i(r, e);
19406    }
19407
19408    #[simd_test(enable = "avx512bw,avx512vl")]
19409    const fn test_mm256_maskz_broadcastb_epi8() {
19410        let a = _mm_set_epi8(
19411            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19412        );
19413        let r = _mm256_maskz_broadcastb_epi8(0, a);
19414        assert_eq_m256i(r, _mm256_setzero_si256());
19415        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
19416        let e = _mm256_set1_epi8(32);
19417        assert_eq_m256i(r, e);
19418    }
19419
19420    #[simd_test(enable = "avx512bw,avx512vl")]
19421    const fn test_mm_mask_broadcastb_epi8() {
19422        let src = _mm_set1_epi8(1);
19423        let a = _mm_set_epi8(
19424            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19425        );
19426        let r = _mm_mask_broadcastb_epi8(src, 0, a);
19427        assert_eq_m128i(r, src);
19428        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
19429        let e = _mm_set1_epi8(32);
19430        assert_eq_m128i(r, e);
19431    }
19432
19433    #[simd_test(enable = "avx512bw,avx512vl")]
19434    const fn test_mm_maskz_broadcastb_epi8() {
19435        let a = _mm_set_epi8(
19436            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19437        );
19438        let r = _mm_maskz_broadcastb_epi8(0, a);
19439        assert_eq_m128i(r, _mm_setzero_si128());
19440        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
19441        let e = _mm_set1_epi8(32);
19442        assert_eq_m128i(r, e);
19443    }
19444
19445    #[simd_test(enable = "avx512bw")]
19446    const fn test_mm512_unpackhi_epi16() {
19447        #[rustfmt::skip]
19448        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19449                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19450        #[rustfmt::skip]
19451        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19452                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19453        let r = _mm512_unpackhi_epi16(a, b);
19454        #[rustfmt::skip]
19455        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19456                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19457        assert_eq_m512i(r, e);
19458    }
19459
19460    #[simd_test(enable = "avx512bw")]
19461    const fn test_mm512_mask_unpackhi_epi16() {
19462        #[rustfmt::skip]
19463        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19464                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19465        #[rustfmt::skip]
19466        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19467                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19468        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
19469        assert_eq_m512i(r, a);
19470        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19471        #[rustfmt::skip]
19472        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19473                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19474        assert_eq_m512i(r, e);
19475    }
19476
19477    #[simd_test(enable = "avx512bw")]
19478    const fn test_mm512_maskz_unpackhi_epi16() {
19479        #[rustfmt::skip]
19480        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19481                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19482        #[rustfmt::skip]
19483        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19484                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19485        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
19486        assert_eq_m512i(r, _mm512_setzero_si512());
19487        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
19488        #[rustfmt::skip]
19489        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19490                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19491        assert_eq_m512i(r, e);
19492    }
19493
19494    #[simd_test(enable = "avx512bw,avx512vl")]
19495    const fn test_mm256_mask_unpackhi_epi16() {
19496        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19497        let b = _mm256_set_epi16(
19498            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19499        );
19500        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
19501        assert_eq_m256i(r, a);
19502        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
19503        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19504        assert_eq_m256i(r, e);
19505    }
19506
19507    #[simd_test(enable = "avx512bw,avx512vl")]
19508    const fn test_mm256_maskz_unpackhi_epi16() {
19509        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19510        let b = _mm256_set_epi16(
19511            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19512        );
19513        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
19514        assert_eq_m256i(r, _mm256_setzero_si256());
19515        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
19516        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19517        assert_eq_m256i(r, e);
19518    }
19519
19520    #[simd_test(enable = "avx512bw,avx512vl")]
19521    const fn test_mm_mask_unpackhi_epi16() {
19522        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19523        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19524        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
19525        assert_eq_m128i(r, a);
19526        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
19527        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19528        assert_eq_m128i(r, e);
19529    }
19530
19531    #[simd_test(enable = "avx512bw,avx512vl")]
19532    const fn test_mm_maskz_unpackhi_epi16() {
19533        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19534        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19535        let r = _mm_maskz_unpackhi_epi16(0, a, b);
19536        assert_eq_m128i(r, _mm_setzero_si128());
19537        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
19538        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19539        assert_eq_m128i(r, e);
19540    }
19541
19542    #[simd_test(enable = "avx512bw")]
19543    const fn test_mm512_unpackhi_epi8() {
19544        #[rustfmt::skip]
19545        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19546                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19547                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19548                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19549        #[rustfmt::skip]
19550        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19551                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19552                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19553                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19554        let r = _mm512_unpackhi_epi8(a, b);
19555        #[rustfmt::skip]
19556        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19557                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19558                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19559                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19560        assert_eq_m512i(r, e);
19561    }
19562
19563    #[simd_test(enable = "avx512bw")]
19564    const fn test_mm512_mask_unpackhi_epi8() {
19565        #[rustfmt::skip]
19566        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19567                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19568                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19569                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19570        #[rustfmt::skip]
19571        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19572                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19573                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19574                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19575        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
19576        assert_eq_m512i(r, a);
19577        let r = _mm512_mask_unpackhi_epi8(
19578            a,
19579            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19580            a,
19581            b,
19582        );
19583        #[rustfmt::skip]
19584        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19585                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19586                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19587                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19588        assert_eq_m512i(r, e);
19589    }
19590
19591    #[simd_test(enable = "avx512bw")]
19592    const fn test_mm512_maskz_unpackhi_epi8() {
19593        #[rustfmt::skip]
19594        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19595                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19596                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19597                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19598        #[rustfmt::skip]
19599        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19600                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19601                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19602                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19603        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
19604        assert_eq_m512i(r, _mm512_setzero_si512());
19605        let r = _mm512_maskz_unpackhi_epi8(
19606            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19607            a,
19608            b,
19609        );
19610        #[rustfmt::skip]
19611        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19612                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19613                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19614                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19615        assert_eq_m512i(r, e);
19616    }
19617
19618    #[simd_test(enable = "avx512bw,avx512vl")]
19619    const fn test_mm256_mask_unpackhi_epi8() {
19620        #[rustfmt::skip]
19621        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19622                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19623        #[rustfmt::skip]
19624        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19625                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19626        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
19627        assert_eq_m256i(r, a);
19628        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19629        #[rustfmt::skip]
19630        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19631                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19632        assert_eq_m256i(r, e);
19633    }
19634
19635    #[simd_test(enable = "avx512bw,avx512vl")]
19636    const fn test_mm256_maskz_unpackhi_epi8() {
19637        #[rustfmt::skip]
19638        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19639                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19640        #[rustfmt::skip]
19641        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19642                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19643        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
19644        assert_eq_m256i(r, _mm256_setzero_si256());
19645        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
19646        #[rustfmt::skip]
19647        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19648                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19649        assert_eq_m256i(r, e);
19650    }
19651
19652    #[simd_test(enable = "avx512bw,avx512vl")]
19653    const fn test_mm_mask_unpackhi_epi8() {
19654        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19655        let b = _mm_set_epi8(
19656            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19657        );
19658        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
19659        assert_eq_m128i(r, a);
19660        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
19661        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19662        assert_eq_m128i(r, e);
19663    }
19664
19665    #[simd_test(enable = "avx512bw,avx512vl")]
19666    const fn test_mm_maskz_unpackhi_epi8() {
19667        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19668        let b = _mm_set_epi8(
19669            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19670        );
19671        let r = _mm_maskz_unpackhi_epi8(0, a, b);
19672        assert_eq_m128i(r, _mm_setzero_si128());
19673        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
19674        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19675        assert_eq_m128i(r, e);
19676    }
19677
19678    #[simd_test(enable = "avx512bw")]
19679    const fn test_mm512_unpacklo_epi16() {
19680        #[rustfmt::skip]
19681        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19682                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19683        #[rustfmt::skip]
19684        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19685                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19686        let r = _mm512_unpacklo_epi16(a, b);
19687        #[rustfmt::skip]
19688        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19689                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19690        assert_eq_m512i(r, e);
19691    }
19692
19693    #[simd_test(enable = "avx512bw")]
19694    const fn test_mm512_mask_unpacklo_epi16() {
19695        #[rustfmt::skip]
19696        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19697                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19698        #[rustfmt::skip]
19699        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19700                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19701        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
19702        assert_eq_m512i(r, a);
19703        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19704        #[rustfmt::skip]
19705        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19706                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19707        assert_eq_m512i(r, e);
19708    }
19709
19710    #[simd_test(enable = "avx512bw")]
19711    const fn test_mm512_maskz_unpacklo_epi16() {
19712        #[rustfmt::skip]
19713        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19714                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19715        #[rustfmt::skip]
19716        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19717                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19718        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
19719        assert_eq_m512i(r, _mm512_setzero_si512());
19720        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
19721        #[rustfmt::skip]
19722        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19723                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19724        assert_eq_m512i(r, e);
19725    }
19726
19727    #[simd_test(enable = "avx512bw,avx512vl")]
19728    const fn test_mm256_mask_unpacklo_epi16() {
19729        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19730        let b = _mm256_set_epi16(
19731            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19732        );
19733        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
19734        assert_eq_m256i(r, a);
19735        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
19736        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19737        assert_eq_m256i(r, e);
19738    }
19739
19740    #[simd_test(enable = "avx512bw,avx512vl")]
19741    const fn test_mm256_maskz_unpacklo_epi16() {
19742        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19743        let b = _mm256_set_epi16(
19744            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19745        );
19746        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
19747        assert_eq_m256i(r, _mm256_setzero_si256());
19748        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
19749        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19750        assert_eq_m256i(r, e);
19751    }
19752
19753    #[simd_test(enable = "avx512bw,avx512vl")]
19754    const fn test_mm_mask_unpacklo_epi16() {
19755        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19756        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19757        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
19758        assert_eq_m128i(r, a);
19759        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
19760        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19761        assert_eq_m128i(r, e);
19762    }
19763
19764    #[simd_test(enable = "avx512bw,avx512vl")]
19765    const fn test_mm_maskz_unpacklo_epi16() {
19766        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19767        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19768        let r = _mm_maskz_unpacklo_epi16(0, a, b);
19769        assert_eq_m128i(r, _mm_setzero_si128());
19770        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
19771        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19772        assert_eq_m128i(r, e);
19773    }
19774
19775    #[simd_test(enable = "avx512bw")]
19776    const fn test_mm512_unpacklo_epi8() {
19777        #[rustfmt::skip]
19778        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19779                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19780                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19781                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19782        #[rustfmt::skip]
19783        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19784                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19785                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19786                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19787        let r = _mm512_unpacklo_epi8(a, b);
19788        #[rustfmt::skip]
19789        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19790                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19791                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19792                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19793        assert_eq_m512i(r, e);
19794    }
19795
19796    #[simd_test(enable = "avx512bw")]
19797    const fn test_mm512_mask_unpacklo_epi8() {
19798        #[rustfmt::skip]
19799        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19800                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19801                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19802                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19803        #[rustfmt::skip]
19804        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19805                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19806                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19807                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19808        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
19809        assert_eq_m512i(r, a);
19810        let r = _mm512_mask_unpacklo_epi8(
19811            a,
19812            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19813            a,
19814            b,
19815        );
19816        #[rustfmt::skip]
19817        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19818                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19819                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19820                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19821        assert_eq_m512i(r, e);
19822    }
19823
19824    #[simd_test(enable = "avx512bw")]
19825    const fn test_mm512_maskz_unpacklo_epi8() {
19826        #[rustfmt::skip]
19827        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19828                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19829                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19830                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19831        #[rustfmt::skip]
19832        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19833                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19834                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19835                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19836        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
19837        assert_eq_m512i(r, _mm512_setzero_si512());
19838        let r = _mm512_maskz_unpacklo_epi8(
19839            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19840            a,
19841            b,
19842        );
19843        #[rustfmt::skip]
19844        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19845                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19846                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19847                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19848        assert_eq_m512i(r, e);
19849    }
19850
19851    #[simd_test(enable = "avx512bw,avx512vl")]
19852    const fn test_mm256_mask_unpacklo_epi8() {
19853        #[rustfmt::skip]
19854        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19855                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19856        #[rustfmt::skip]
19857        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19858                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19859        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
19860        assert_eq_m256i(r, a);
19861        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19862        #[rustfmt::skip]
19863        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19864                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19865        assert_eq_m256i(r, e);
19866    }
19867
19868    #[simd_test(enable = "avx512bw,avx512vl")]
19869    const fn test_mm256_maskz_unpacklo_epi8() {
19870        #[rustfmt::skip]
19871        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19872                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19873        #[rustfmt::skip]
19874        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19875                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19876        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
19877        assert_eq_m256i(r, _mm256_setzero_si256());
19878        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
19879        #[rustfmt::skip]
19880        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19881                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19882        assert_eq_m256i(r, e);
19883    }
19884
19885    #[simd_test(enable = "avx512bw,avx512vl")]
19886    const fn test_mm_mask_unpacklo_epi8() {
19887        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19888        let b = _mm_set_epi8(
19889            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19890        );
19891        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
19892        assert_eq_m128i(r, a);
19893        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
19894        let e = _mm_set_epi8(
19895            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19896        );
19897        assert_eq_m128i(r, e);
19898    }
19899
19900    #[simd_test(enable = "avx512bw,avx512vl")]
19901    const fn test_mm_maskz_unpacklo_epi8() {
19902        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19903        let b = _mm_set_epi8(
19904            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19905        );
19906        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19907        assert_eq_m128i(r, _mm_setzero_si128());
19908        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19909        let e = _mm_set_epi8(
19910            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19911        );
19912        assert_eq_m128i(r, e);
19913    }
19914
19915    #[simd_test(enable = "avx512bw")]
19916    const fn test_mm512_mask_mov_epi16() {
19917        let src = _mm512_set1_epi16(1);
19918        let a = _mm512_set1_epi16(2);
19919        let r = _mm512_mask_mov_epi16(src, 0, a);
19920        assert_eq_m512i(r, src);
19921        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19922        assert_eq_m512i(r, a);
19923    }
19924
19925    #[simd_test(enable = "avx512bw")]
19926    const fn test_mm512_maskz_mov_epi16() {
19927        let a = _mm512_set1_epi16(2);
19928        let r = _mm512_maskz_mov_epi16(0, a);
19929        assert_eq_m512i(r, _mm512_setzero_si512());
19930        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19931        assert_eq_m512i(r, a);
19932    }
19933
19934    #[simd_test(enable = "avx512bw,avx512vl")]
19935    const fn test_mm256_mask_mov_epi16() {
19936        let src = _mm256_set1_epi16(1);
19937        let a = _mm256_set1_epi16(2);
19938        let r = _mm256_mask_mov_epi16(src, 0, a);
19939        assert_eq_m256i(r, src);
19940        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19941        assert_eq_m256i(r, a);
19942    }
19943
19944    #[simd_test(enable = "avx512bw,avx512vl")]
19945    const fn test_mm256_maskz_mov_epi16() {
19946        let a = _mm256_set1_epi16(2);
19947        let r = _mm256_maskz_mov_epi16(0, a);
19948        assert_eq_m256i(r, _mm256_setzero_si256());
19949        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19950        assert_eq_m256i(r, a);
19951    }
19952
19953    #[simd_test(enable = "avx512bw,avx512vl")]
19954    const fn test_mm_mask_mov_epi16() {
19955        let src = _mm_set1_epi16(1);
19956        let a = _mm_set1_epi16(2);
19957        let r = _mm_mask_mov_epi16(src, 0, a);
19958        assert_eq_m128i(r, src);
19959        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19960        assert_eq_m128i(r, a);
19961    }
19962
19963    #[simd_test(enable = "avx512bw,avx512vl")]
19964    const fn test_mm_maskz_mov_epi16() {
19965        let a = _mm_set1_epi16(2);
19966        let r = _mm_maskz_mov_epi16(0, a);
19967        assert_eq_m128i(r, _mm_setzero_si128());
19968        let r = _mm_maskz_mov_epi16(0b11111111, a);
19969        assert_eq_m128i(r, a);
19970    }
19971
19972    #[simd_test(enable = "avx512bw")]
19973    const fn test_mm512_mask_mov_epi8() {
19974        let src = _mm512_set1_epi8(1);
19975        let a = _mm512_set1_epi8(2);
19976        let r = _mm512_mask_mov_epi8(src, 0, a);
19977        assert_eq_m512i(r, src);
19978        let r = _mm512_mask_mov_epi8(
19979            src,
19980            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19981            a,
19982        );
19983        assert_eq_m512i(r, a);
19984    }
19985
19986    #[simd_test(enable = "avx512bw")]
19987    const fn test_mm512_maskz_mov_epi8() {
19988        let a = _mm512_set1_epi8(2);
19989        let r = _mm512_maskz_mov_epi8(0, a);
19990        assert_eq_m512i(r, _mm512_setzero_si512());
19991        let r = _mm512_maskz_mov_epi8(
19992            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19993            a,
19994        );
19995        assert_eq_m512i(r, a);
19996    }
19997
19998    #[simd_test(enable = "avx512bw,avx512vl")]
19999    const fn test_mm256_mask_mov_epi8() {
20000        let src = _mm256_set1_epi8(1);
20001        let a = _mm256_set1_epi8(2);
20002        let r = _mm256_mask_mov_epi8(src, 0, a);
20003        assert_eq_m256i(r, src);
20004        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20005        assert_eq_m256i(r, a);
20006    }
20007
20008    #[simd_test(enable = "avx512bw,avx512vl")]
20009    const fn test_mm256_maskz_mov_epi8() {
20010        let a = _mm256_set1_epi8(2);
20011        let r = _mm256_maskz_mov_epi8(0, a);
20012        assert_eq_m256i(r, _mm256_setzero_si256());
20013        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
20014        assert_eq_m256i(r, a);
20015    }
20016
20017    #[simd_test(enable = "avx512bw,avx512vl")]
20018    const fn test_mm_mask_mov_epi8() {
20019        let src = _mm_set1_epi8(1);
20020        let a = _mm_set1_epi8(2);
20021        let r = _mm_mask_mov_epi8(src, 0, a);
20022        assert_eq_m128i(r, src);
20023        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
20024        assert_eq_m128i(r, a);
20025    }
20026
20027    #[simd_test(enable = "avx512bw,avx512vl")]
20028    const fn test_mm_maskz_mov_epi8() {
20029        let a = _mm_set1_epi8(2);
20030        let r = _mm_maskz_mov_epi8(0, a);
20031        assert_eq_m128i(r, _mm_setzero_si128());
20032        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
20033        assert_eq_m128i(r, a);
20034    }
20035
20036    #[simd_test(enable = "avx512bw")]
20037    const fn test_mm512_mask_set1_epi16() {
20038        let src = _mm512_set1_epi16(2);
20039        let a: i16 = 11;
20040        let r = _mm512_mask_set1_epi16(src, 0, a);
20041        assert_eq_m512i(r, src);
20042        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20043        let e = _mm512_set1_epi16(11);
20044        assert_eq_m512i(r, e);
20045    }
20046
20047    #[simd_test(enable = "avx512bw")]
20048    const fn test_mm512_maskz_set1_epi16() {
20049        let a: i16 = 11;
20050        let r = _mm512_maskz_set1_epi16(0, a);
20051        assert_eq_m512i(r, _mm512_setzero_si512());
20052        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
20053        let e = _mm512_set1_epi16(11);
20054        assert_eq_m512i(r, e);
20055    }
20056
20057    #[simd_test(enable = "avx512bw,avx512vl")]
20058    const fn test_mm256_mask_set1_epi16() {
20059        let src = _mm256_set1_epi16(2);
20060        let a: i16 = 11;
20061        let r = _mm256_mask_set1_epi16(src, 0, a);
20062        assert_eq_m256i(r, src);
20063        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
20064        let e = _mm256_set1_epi16(11);
20065        assert_eq_m256i(r, e);
20066    }
20067
20068    #[simd_test(enable = "avx512bw,avx512vl")]
20069    const fn test_mm256_maskz_set1_epi16() {
20070        let a: i16 = 11;
20071        let r = _mm256_maskz_set1_epi16(0, a);
20072        assert_eq_m256i(r, _mm256_setzero_si256());
20073        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
20074        let e = _mm256_set1_epi16(11);
20075        assert_eq_m256i(r, e);
20076    }
20077
20078    #[simd_test(enable = "avx512bw,avx512vl")]
20079    const fn test_mm_mask_set1_epi16() {
20080        let src = _mm_set1_epi16(2);
20081        let a: i16 = 11;
20082        let r = _mm_mask_set1_epi16(src, 0, a);
20083        assert_eq_m128i(r, src);
20084        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
20085        let e = _mm_set1_epi16(11);
20086        assert_eq_m128i(r, e);
20087    }
20088
20089    #[simd_test(enable = "avx512bw,avx512vl")]
20090    const fn test_mm_maskz_set1_epi16() {
20091        let a: i16 = 11;
20092        let r = _mm_maskz_set1_epi16(0, a);
20093        assert_eq_m128i(r, _mm_setzero_si128());
20094        let r = _mm_maskz_set1_epi16(0b11111111, a);
20095        let e = _mm_set1_epi16(11);
20096        assert_eq_m128i(r, e);
20097    }
20098
20099    #[simd_test(enable = "avx512bw")]
20100    const fn test_mm512_mask_set1_epi8() {
20101        let src = _mm512_set1_epi8(2);
20102        let a: i8 = 11;
20103        let r = _mm512_mask_set1_epi8(src, 0, a);
20104        assert_eq_m512i(r, src);
20105        let r = _mm512_mask_set1_epi8(
20106            src,
20107            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20108            a,
20109        );
20110        let e = _mm512_set1_epi8(11);
20111        assert_eq_m512i(r, e);
20112    }
20113
20114    #[simd_test(enable = "avx512bw")]
20115    const fn test_mm512_maskz_set1_epi8() {
20116        let a: i8 = 11;
20117        let r = _mm512_maskz_set1_epi8(0, a);
20118        assert_eq_m512i(r, _mm512_setzero_si512());
20119        let r = _mm512_maskz_set1_epi8(
20120            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20121            a,
20122        );
20123        let e = _mm512_set1_epi8(11);
20124        assert_eq_m512i(r, e);
20125    }
20126
20127    #[simd_test(enable = "avx512bw,avx512vl")]
20128    const fn test_mm256_mask_set1_epi8() {
20129        let src = _mm256_set1_epi8(2);
20130        let a: i8 = 11;
20131        let r = _mm256_mask_set1_epi8(src, 0, a);
20132        assert_eq_m256i(r, src);
20133        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20134        let e = _mm256_set1_epi8(11);
20135        assert_eq_m256i(r, e);
20136    }
20137
20138    #[simd_test(enable = "avx512bw,avx512vl")]
20139    const fn test_mm256_maskz_set1_epi8() {
20140        let a: i8 = 11;
20141        let r = _mm256_maskz_set1_epi8(0, a);
20142        assert_eq_m256i(r, _mm256_setzero_si256());
20143        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
20144        let e = _mm256_set1_epi8(11);
20145        assert_eq_m256i(r, e);
20146    }
20147
20148    #[simd_test(enable = "avx512bw,avx512vl")]
20149    const fn test_mm_mask_set1_epi8() {
20150        let src = _mm_set1_epi8(2);
20151        let a: i8 = 11;
20152        let r = _mm_mask_set1_epi8(src, 0, a);
20153        assert_eq_m128i(r, src);
20154        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
20155        let e = _mm_set1_epi8(11);
20156        assert_eq_m128i(r, e);
20157    }
20158
20159    #[simd_test(enable = "avx512bw,avx512vl")]
20160    const fn test_mm_maskz_set1_epi8() {
20161        let a: i8 = 11;
20162        let r = _mm_maskz_set1_epi8(0, a);
20163        assert_eq_m128i(r, _mm_setzero_si128());
20164        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
20165        let e = _mm_set1_epi8(11);
20166        assert_eq_m128i(r, e);
20167    }
20168
20169    #[simd_test(enable = "avx512bw")]
20170    const fn test_mm512_shufflelo_epi16() {
20171        #[rustfmt::skip]
20172        let a = _mm512_set_epi16(
20173            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20174            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20175        );
20176        #[rustfmt::skip]
20177        let e = _mm512_set_epi16(
20178            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20179            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20180        );
20181        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
20182        assert_eq_m512i(r, e);
20183    }
20184
20185    #[simd_test(enable = "avx512bw")]
20186    const fn test_mm512_mask_shufflelo_epi16() {
20187        #[rustfmt::skip]
20188        let a = _mm512_set_epi16(
20189            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20190            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20191        );
20192        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20193        assert_eq_m512i(r, a);
20194        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
20195            a,
20196            0b11111111_11111111_11111111_11111111,
20197            a,
20198        );
20199        #[rustfmt::skip]
20200        let e = _mm512_set_epi16(
20201            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20202            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20203        );
20204        assert_eq_m512i(r, e);
20205    }
20206
20207    #[simd_test(enable = "avx512bw")]
20208    const fn test_mm512_maskz_shufflelo_epi16() {
20209        #[rustfmt::skip]
20210        let a = _mm512_set_epi16(
20211            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20212            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20213        );
20214        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20215        assert_eq_m512i(r, _mm512_setzero_si512());
20216        let r =
20217            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20218        #[rustfmt::skip]
20219        let e = _mm512_set_epi16(
20220            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20221            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20222        );
20223        assert_eq_m512i(r, e);
20224    }
20225
20226    #[simd_test(enable = "avx512bw,avx512vl")]
20227    const fn test_mm256_mask_shufflelo_epi16() {
20228        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20229        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20230        assert_eq_m256i(r, a);
20231        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20232        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20233        assert_eq_m256i(r, e);
20234    }
20235
20236    #[simd_test(enable = "avx512bw,avx512vl")]
20237    const fn test_mm256_maskz_shufflelo_epi16() {
20238        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20239        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20240        assert_eq_m256i(r, _mm256_setzero_si256());
20241        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20242        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20243        assert_eq_m256i(r, e);
20244    }
20245
20246    #[simd_test(enable = "avx512bw,avx512vl")]
20247    const fn test_mm_mask_shufflelo_epi16() {
20248        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20249        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20250        assert_eq_m128i(r, a);
20251        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20252        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20253        assert_eq_m128i(r, e);
20254    }
20255
20256    #[simd_test(enable = "avx512bw,avx512vl")]
20257    const fn test_mm_maskz_shufflelo_epi16() {
20258        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20259        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20260        assert_eq_m128i(r, _mm_setzero_si128());
20261        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
20262        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20263        assert_eq_m128i(r, e);
20264    }
20265
20266    #[simd_test(enable = "avx512bw")]
20267    const fn test_mm512_shufflehi_epi16() {
20268        #[rustfmt::skip]
20269        let a = _mm512_set_epi16(
20270            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20271            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20272        );
20273        #[rustfmt::skip]
20274        let e = _mm512_set_epi16(
20275            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20276            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20277        );
20278        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
20279        assert_eq_m512i(r, e);
20280    }
20281
20282    #[simd_test(enable = "avx512bw")]
20283    const fn test_mm512_mask_shufflehi_epi16() {
20284        #[rustfmt::skip]
20285        let a = _mm512_set_epi16(
20286            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20287            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20288        );
20289        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20290        assert_eq_m512i(r, a);
20291        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
20292            a,
20293            0b11111111_11111111_11111111_11111111,
20294            a,
20295        );
20296        #[rustfmt::skip]
20297        let e = _mm512_set_epi16(
20298            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20299            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20300        );
20301        assert_eq_m512i(r, e);
20302    }
20303
20304    #[simd_test(enable = "avx512bw")]
20305    const fn test_mm512_maskz_shufflehi_epi16() {
20306        #[rustfmt::skip]
20307        let a = _mm512_set_epi16(
20308            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20309            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20310        );
20311        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20312        assert_eq_m512i(r, _mm512_setzero_si512());
20313        let r =
20314            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20315        #[rustfmt::skip]
20316        let e = _mm512_set_epi16(
20317            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20318            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20319        );
20320        assert_eq_m512i(r, e);
20321    }
20322
20323    #[simd_test(enable = "avx512bw,avx512vl")]
20324    const fn test_mm256_mask_shufflehi_epi16() {
20325        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20326        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20327        assert_eq_m256i(r, a);
20328        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20329        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20330        assert_eq_m256i(r, e);
20331    }
20332
20333    #[simd_test(enable = "avx512bw,avx512vl")]
20334    const fn test_mm256_maskz_shufflehi_epi16() {
20335        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20336        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20337        assert_eq_m256i(r, _mm256_setzero_si256());
20338        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20339        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20340        assert_eq_m256i(r, e);
20341    }
20342
20343    #[simd_test(enable = "avx512bw,avx512vl")]
20344    const fn test_mm_mask_shufflehi_epi16() {
20345        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20346        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20347        assert_eq_m128i(r, a);
20348        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20349        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20350        assert_eq_m128i(r, e);
20351    }
20352
20353    #[simd_test(enable = "avx512bw,avx512vl")]
20354    const fn test_mm_maskz_shufflehi_epi16() {
20355        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20356        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20357        assert_eq_m128i(r, _mm_setzero_si128());
20358        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
20359        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20360        assert_eq_m128i(r, e);
20361    }
20362
20363    #[simd_test(enable = "avx512bw")]
20364    fn test_mm512_shuffle_epi8() {
20365        #[rustfmt::skip]
20366        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20367                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20368                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20369                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20370        let b = _mm512_set1_epi8(1);
20371        let r = _mm512_shuffle_epi8(a, b);
20372        #[rustfmt::skip]
20373        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20374                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20375                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20376                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20377        assert_eq_m512i(r, e);
20378    }
20379
20380    #[simd_test(enable = "avx512bw")]
20381    fn test_mm512_mask_shuffle_epi8() {
20382        #[rustfmt::skip]
20383        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20384                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20385                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20386                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20387        let b = _mm512_set1_epi8(1);
20388        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
20389        assert_eq_m512i(r, a);
20390        let r = _mm512_mask_shuffle_epi8(
20391            a,
20392            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20393            a,
20394            b,
20395        );
20396        #[rustfmt::skip]
20397        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20398                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20399                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20400                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20401        assert_eq_m512i(r, e);
20402    }
20403
20404    #[simd_test(enable = "avx512bw")]
20405    fn test_mm512_maskz_shuffle_epi8() {
20406        #[rustfmt::skip]
20407        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20408                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20409                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20410                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20411        let b = _mm512_set1_epi8(1);
20412        let r = _mm512_maskz_shuffle_epi8(0, a, b);
20413        assert_eq_m512i(r, _mm512_setzero_si512());
20414        let r = _mm512_maskz_shuffle_epi8(
20415            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20416            a,
20417            b,
20418        );
20419        #[rustfmt::skip]
20420        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20421                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20422                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20423                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20424        assert_eq_m512i(r, e);
20425    }
20426
20427    #[simd_test(enable = "avx512bw,avx512vl")]
20428    fn test_mm256_mask_shuffle_epi8() {
20429        #[rustfmt::skip]
20430        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20431                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20432        let b = _mm256_set1_epi8(1);
20433        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
20434        assert_eq_m256i(r, a);
20435        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
20436        #[rustfmt::skip]
20437        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20438                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20439        assert_eq_m256i(r, e);
20440    }
20441
20442    #[simd_test(enable = "avx512bw,avx512vl")]
20443    fn test_mm256_maskz_shuffle_epi8() {
20444        #[rustfmt::skip]
20445        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20446                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20447        let b = _mm256_set1_epi8(1);
20448        let r = _mm256_maskz_shuffle_epi8(0, a, b);
20449        assert_eq_m256i(r, _mm256_setzero_si256());
20450        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
20451        #[rustfmt::skip]
20452        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20453                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20454        assert_eq_m256i(r, e);
20455    }
20456
20457    #[simd_test(enable = "avx512bw,avx512vl")]
20458    fn test_mm_mask_shuffle_epi8() {
20459        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20460        let b = _mm_set1_epi8(1);
20461        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
20462        assert_eq_m128i(r, a);
20463        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
20464        let e = _mm_set_epi8(
20465            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20466        );
20467        assert_eq_m128i(r, e);
20468    }
20469
20470    #[simd_test(enable = "avx512bw,avx512vl")]
20471    fn test_mm_maskz_shuffle_epi8() {
20472        #[rustfmt::skip]
20473        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
20474        let b = _mm_set1_epi8(1);
20475        let r = _mm_maskz_shuffle_epi8(0, a, b);
20476        assert_eq_m128i(r, _mm_setzero_si128());
20477        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
20478        let e = _mm_set_epi8(
20479            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20480        );
20481        assert_eq_m128i(r, e);
20482    }
20483
20484    #[simd_test(enable = "avx512bw")]
20485    const fn test_mm512_test_epi16_mask() {
20486        let a = _mm512_set1_epi16(1 << 0);
20487        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20488        let r = _mm512_test_epi16_mask(a, b);
20489        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20490        assert_eq!(r, e);
20491    }
20492
20493    #[simd_test(enable = "avx512bw")]
20494    const fn test_mm512_mask_test_epi16_mask() {
20495        let a = _mm512_set1_epi16(1 << 0);
20496        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20497        let r = _mm512_mask_test_epi16_mask(0, a, b);
20498        assert_eq!(r, 0);
20499        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20500        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20501        assert_eq!(r, e);
20502    }
20503
20504    #[simd_test(enable = "avx512bw,avx512vl")]
20505    const fn test_mm256_test_epi16_mask() {
20506        let a = _mm256_set1_epi16(1 << 0);
20507        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20508        let r = _mm256_test_epi16_mask(a, b);
20509        let e: __mmask16 = 0b11111111_11111111;
20510        assert_eq!(r, e);
20511    }
20512
20513    #[simd_test(enable = "avx512bw,avx512vl")]
20514    const fn test_mm256_mask_test_epi16_mask() {
20515        let a = _mm256_set1_epi16(1 << 0);
20516        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20517        let r = _mm256_mask_test_epi16_mask(0, a, b);
20518        assert_eq!(r, 0);
20519        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
20520        let e: __mmask16 = 0b11111111_11111111;
20521        assert_eq!(r, e);
20522    }
20523
20524    #[simd_test(enable = "avx512bw,avx512vl")]
20525    const fn test_mm_test_epi16_mask() {
20526        let a = _mm_set1_epi16(1 << 0);
20527        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20528        let r = _mm_test_epi16_mask(a, b);
20529        let e: __mmask8 = 0b11111111;
20530        assert_eq!(r, e);
20531    }
20532
20533    #[simd_test(enable = "avx512bw,avx512vl")]
20534    const fn test_mm_mask_test_epi16_mask() {
20535        let a = _mm_set1_epi16(1 << 0);
20536        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20537        let r = _mm_mask_test_epi16_mask(0, a, b);
20538        assert_eq!(r, 0);
20539        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
20540        let e: __mmask8 = 0b11111111;
20541        assert_eq!(r, e);
20542    }
20543
20544    #[simd_test(enable = "avx512bw")]
20545    const fn test_mm512_test_epi8_mask() {
20546        let a = _mm512_set1_epi8(1 << 0);
20547        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20548        let r = _mm512_test_epi8_mask(a, b);
20549        let e: __mmask64 =
20550            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20551        assert_eq!(r, e);
20552    }
20553
20554    #[simd_test(enable = "avx512bw")]
20555    const fn test_mm512_mask_test_epi8_mask() {
20556        let a = _mm512_set1_epi8(1 << 0);
20557        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20558        let r = _mm512_mask_test_epi8_mask(0, a, b);
20559        assert_eq!(r, 0);
20560        let r = _mm512_mask_test_epi8_mask(
20561            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20562            a,
20563            b,
20564        );
20565        let e: __mmask64 =
20566            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20567        assert_eq!(r, e);
20568    }
20569
20570    #[simd_test(enable = "avx512bw,avx512vl")]
20571    const fn test_mm256_test_epi8_mask() {
20572        let a = _mm256_set1_epi8(1 << 0);
20573        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20574        let r = _mm256_test_epi8_mask(a, b);
20575        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20576        assert_eq!(r, e);
20577    }
20578
20579    #[simd_test(enable = "avx512bw,avx512vl")]
20580    const fn test_mm256_mask_test_epi8_mask() {
20581        let a = _mm256_set1_epi8(1 << 0);
20582        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20583        let r = _mm256_mask_test_epi8_mask(0, a, b);
20584        assert_eq!(r, 0);
20585        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20586        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20587        assert_eq!(r, e);
20588    }
20589
20590    #[simd_test(enable = "avx512bw,avx512vl")]
20591    const fn test_mm_test_epi8_mask() {
20592        let a = _mm_set1_epi8(1 << 0);
20593        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20594        let r = _mm_test_epi8_mask(a, b);
20595        let e: __mmask16 = 0b11111111_11111111;
20596        assert_eq!(r, e);
20597    }
20598
20599    #[simd_test(enable = "avx512bw,avx512vl")]
20600    const fn test_mm_mask_test_epi8_mask() {
20601        let a = _mm_set1_epi8(1 << 0);
20602        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20603        let r = _mm_mask_test_epi8_mask(0, a, b);
20604        assert_eq!(r, 0);
20605        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
20606        let e: __mmask16 = 0b11111111_11111111;
20607        assert_eq!(r, e);
20608    }
20609
20610    #[simd_test(enable = "avx512bw")]
20611    const fn test_mm512_testn_epi16_mask() {
20612        let a = _mm512_set1_epi16(1 << 0);
20613        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20614        let r = _mm512_testn_epi16_mask(a, b);
20615        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20616        assert_eq!(r, e);
20617    }
20618
20619    #[simd_test(enable = "avx512bw")]
20620    const fn test_mm512_mask_testn_epi16_mask() {
20621        let a = _mm512_set1_epi16(1 << 0);
20622        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20623        let r = _mm512_mask_testn_epi16_mask(0, a, b);
20624        assert_eq!(r, 0);
20625        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20626        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20627        assert_eq!(r, e);
20628    }
20629
20630    #[simd_test(enable = "avx512bw,avx512vl")]
20631    const fn test_mm256_testn_epi16_mask() {
20632        let a = _mm256_set1_epi16(1 << 0);
20633        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20634        let r = _mm256_testn_epi16_mask(a, b);
20635        let e: __mmask16 = 0b00000000_00000000;
20636        assert_eq!(r, e);
20637    }
20638
20639    #[simd_test(enable = "avx512bw,avx512vl")]
20640    const fn test_mm256_mask_testn_epi16_mask() {
20641        let a = _mm256_set1_epi16(1 << 0);
20642        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20643        let r = _mm256_mask_testn_epi16_mask(0, a, b);
20644        assert_eq!(r, 0);
20645        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
20646        let e: __mmask16 = 0b00000000_00000000;
20647        assert_eq!(r, e);
20648    }
20649
20650    #[simd_test(enable = "avx512bw,avx512vl")]
20651    const fn test_mm_testn_epi16_mask() {
20652        let a = _mm_set1_epi16(1 << 0);
20653        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20654        let r = _mm_testn_epi16_mask(a, b);
20655        let e: __mmask8 = 0b00000000;
20656        assert_eq!(r, e);
20657    }
20658
20659    #[simd_test(enable = "avx512bw,avx512vl")]
20660    const fn test_mm_mask_testn_epi16_mask() {
20661        let a = _mm_set1_epi16(1 << 0);
20662        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20663        let r = _mm_mask_testn_epi16_mask(0, a, b);
20664        assert_eq!(r, 0);
20665        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
20666        let e: __mmask8 = 0b00000000;
20667        assert_eq!(r, e);
20668    }
20669
20670    #[simd_test(enable = "avx512bw")]
20671    const fn test_mm512_testn_epi8_mask() {
20672        let a = _mm512_set1_epi8(1 << 0);
20673        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20674        let r = _mm512_testn_epi8_mask(a, b);
20675        let e: __mmask64 =
20676            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20677        assert_eq!(r, e);
20678    }
20679
20680    #[simd_test(enable = "avx512bw")]
20681    const fn test_mm512_mask_testn_epi8_mask() {
20682        let a = _mm512_set1_epi8(1 << 0);
20683        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20684        let r = _mm512_mask_testn_epi8_mask(0, a, b);
20685        assert_eq!(r, 0);
20686        let r = _mm512_mask_testn_epi8_mask(
20687            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20688            a,
20689            b,
20690        );
20691        let e: __mmask64 =
20692            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20693        assert_eq!(r, e);
20694    }
20695
20696    #[simd_test(enable = "avx512bw,avx512vl")]
20697    const fn test_mm256_testn_epi8_mask() {
20698        let a = _mm256_set1_epi8(1 << 0);
20699        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20700        let r = _mm256_testn_epi8_mask(a, b);
20701        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20702        assert_eq!(r, e);
20703    }
20704
20705    #[simd_test(enable = "avx512bw,avx512vl")]
20706    const fn test_mm256_mask_testn_epi8_mask() {
20707        let a = _mm256_set1_epi8(1 << 0);
20708        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20709        let r = _mm256_mask_testn_epi8_mask(0, a, b);
20710        assert_eq!(r, 0);
20711        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20712        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20713        assert_eq!(r, e);
20714    }
20715
20716    #[simd_test(enable = "avx512bw,avx512vl")]
20717    const fn test_mm_testn_epi8_mask() {
20718        let a = _mm_set1_epi8(1 << 0);
20719        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20720        let r = _mm_testn_epi8_mask(a, b);
20721        let e: __mmask16 = 0b00000000_00000000;
20722        assert_eq!(r, e);
20723    }
20724
20725    #[simd_test(enable = "avx512bw,avx512vl")]
20726    const fn test_mm_mask_testn_epi8_mask() {
20727        let a = _mm_set1_epi8(1 << 0);
20728        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20729        let r = _mm_mask_testn_epi8_mask(0, a, b);
20730        assert_eq!(r, 0);
20731        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
20732        let e: __mmask16 = 0b00000000_00000000;
20733        assert_eq!(r, e);
20734    }
20735
20736    #[simd_test(enable = "avx512bw")]
20737    const fn test_store_mask64() {
20738        let a: __mmask64 =
20739            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20740        let mut r = 0;
20741        unsafe {
20742            _store_mask64(&mut r, a);
20743        }
20744        assert_eq!(r, a);
20745    }
20746
20747    #[simd_test(enable = "avx512bw")]
20748    const fn test_store_mask32() {
20749        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
20750        let mut r = 0;
20751        unsafe {
20752            _store_mask32(&mut r, a);
20753        }
20754        assert_eq!(r, a);
20755    }
20756
20757    #[simd_test(enable = "avx512bw")]
20758    const fn test_load_mask64() {
20759        let p: __mmask64 =
20760            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20761        let r = unsafe { _load_mask64(&p) };
20762        let e: __mmask64 =
20763            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20764        assert_eq!(r, e);
20765    }
20766
20767    #[simd_test(enable = "avx512bw")]
20768    const fn test_load_mask32() {
20769        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
20770        let r = unsafe { _load_mask32(&p) };
20771        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
20772        assert_eq!(r, e);
20773    }
20774
20775    #[simd_test(enable = "avx512bw")]
20776    fn test_mm512_sad_epu8() {
20777        let a = _mm512_set1_epi8(2);
20778        let b = _mm512_set1_epi8(4);
20779        let r = _mm512_sad_epu8(a, b);
20780        let e = _mm512_set1_epi64(16);
20781        assert_eq_m512i(r, e);
20782    }
20783
20784    #[simd_test(enable = "avx512bw")]
20785    fn test_mm512_dbsad_epu8() {
20786        let a = _mm512_set1_epi8(2);
20787        let b = _mm512_set1_epi8(4);
20788        let r = _mm512_dbsad_epu8::<0>(a, b);
20789        let e = _mm512_set1_epi16(8);
20790        assert_eq_m512i(r, e);
20791    }
20792
20793    #[simd_test(enable = "avx512bw")]
20794    fn test_mm512_mask_dbsad_epu8() {
20795        let src = _mm512_set1_epi16(1);
20796        let a = _mm512_set1_epi8(2);
20797        let b = _mm512_set1_epi8(4);
20798        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
20799        assert_eq_m512i(r, src);
20800        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
20801        let e = _mm512_set1_epi16(8);
20802        assert_eq_m512i(r, e);
20803    }
20804
20805    #[simd_test(enable = "avx512bw")]
20806    fn test_mm512_maskz_dbsad_epu8() {
20807        let a = _mm512_set1_epi8(2);
20808        let b = _mm512_set1_epi8(4);
20809        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
20810        assert_eq_m512i(r, _mm512_setzero_si512());
20811        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
20812        let e = _mm512_set1_epi16(8);
20813        assert_eq_m512i(r, e);
20814    }
20815
20816    #[simd_test(enable = "avx512bw,avx512vl")]
20817    fn test_mm256_dbsad_epu8() {
20818        let a = _mm256_set1_epi8(2);
20819        let b = _mm256_set1_epi8(4);
20820        let r = _mm256_dbsad_epu8::<0>(a, b);
20821        let e = _mm256_set1_epi16(8);
20822        assert_eq_m256i(r, e);
20823    }
20824
20825    #[simd_test(enable = "avx512bw,avx512vl")]
20826    fn test_mm256_mask_dbsad_epu8() {
20827        let src = _mm256_set1_epi16(1);
20828        let a = _mm256_set1_epi8(2);
20829        let b = _mm256_set1_epi8(4);
20830        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
20831        assert_eq_m256i(r, src);
20832        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
20833        let e = _mm256_set1_epi16(8);
20834        assert_eq_m256i(r, e);
20835    }
20836
20837    #[simd_test(enable = "avx512bw,avx512vl")]
20838    fn test_mm256_maskz_dbsad_epu8() {
20839        let a = _mm256_set1_epi8(2);
20840        let b = _mm256_set1_epi8(4);
20841        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
20842        assert_eq_m256i(r, _mm256_setzero_si256());
20843        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
20844        let e = _mm256_set1_epi16(8);
20845        assert_eq_m256i(r, e);
20846    }
20847
20848    #[simd_test(enable = "avx512bw,avx512vl")]
20849    fn test_mm_dbsad_epu8() {
20850        let a = _mm_set1_epi8(2);
20851        let b = _mm_set1_epi8(4);
20852        let r = _mm_dbsad_epu8::<0>(a, b);
20853        let e = _mm_set1_epi16(8);
20854        assert_eq_m128i(r, e);
20855    }
20856
20857    #[simd_test(enable = "avx512bw,avx512vl")]
20858    fn test_mm_mask_dbsad_epu8() {
20859        let src = _mm_set1_epi16(1);
20860        let a = _mm_set1_epi8(2);
20861        let b = _mm_set1_epi8(4);
20862        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
20863        assert_eq_m128i(r, src);
20864        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
20865        let e = _mm_set1_epi16(8);
20866        assert_eq_m128i(r, e);
20867    }
20868
20869    #[simd_test(enable = "avx512bw,avx512vl")]
20870    fn test_mm_maskz_dbsad_epu8() {
20871        let a = _mm_set1_epi8(2);
20872        let b = _mm_set1_epi8(4);
20873        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
20874        assert_eq_m128i(r, _mm_setzero_si128());
20875        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
20876        let e = _mm_set1_epi16(8);
20877        assert_eq_m128i(r, e);
20878    }
20879
20880    #[simd_test(enable = "avx512bw")]
20881    const fn test_mm512_movepi16_mask() {
20882        let a = _mm512_set1_epi16(1 << 15);
20883        let r = _mm512_movepi16_mask(a);
20884        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20885        assert_eq!(r, e);
20886    }
20887
20888    #[simd_test(enable = "avx512bw,avx512vl")]
20889    const fn test_mm256_movepi16_mask() {
20890        let a = _mm256_set1_epi16(1 << 15);
20891        let r = _mm256_movepi16_mask(a);
20892        let e: __mmask16 = 0b11111111_11111111;
20893        assert_eq!(r, e);
20894    }
20895
20896    #[simd_test(enable = "avx512bw,avx512vl")]
20897    const fn test_mm_movepi16_mask() {
20898        let a = _mm_set1_epi16(1 << 15);
20899        let r = _mm_movepi16_mask(a);
20900        let e: __mmask8 = 0b11111111;
20901        assert_eq!(r, e);
20902    }
20903
20904    #[simd_test(enable = "avx512bw")]
20905    const fn test_mm512_movepi8_mask() {
20906        let a = _mm512_set1_epi8(1 << 7);
20907        let r = _mm512_movepi8_mask(a);
20908        let e: __mmask64 =
20909            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20910        assert_eq!(r, e);
20911    }
20912
20913    #[simd_test(enable = "avx512bw,avx512vl")]
20914    const fn test_mm256_movepi8_mask() {
20915        let a = _mm256_set1_epi8(1 << 7);
20916        let r = _mm256_movepi8_mask(a);
20917        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20918        assert_eq!(r, e);
20919    }
20920
20921    #[simd_test(enable = "avx512bw,avx512vl")]
20922    const fn test_mm_movepi8_mask() {
20923        let a = _mm_set1_epi8(1 << 7);
20924        let r = _mm_movepi8_mask(a);
20925        let e: __mmask16 = 0b11111111_11111111;
20926        assert_eq!(r, e);
20927    }
20928
20929    #[simd_test(enable = "avx512bw")]
20930    const fn test_mm512_movm_epi16() {
20931        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20932        let r = _mm512_movm_epi16(a);
20933        let e = _mm512_set1_epi16(
20934            1 << 15
20935                | 1 << 14
20936                | 1 << 13
20937                | 1 << 12
20938                | 1 << 11
20939                | 1 << 10
20940                | 1 << 9
20941                | 1 << 8
20942                | 1 << 7
20943                | 1 << 6
20944                | 1 << 5
20945                | 1 << 4
20946                | 1 << 3
20947                | 1 << 2
20948                | 1 << 1
20949                | 1 << 0,
20950        );
20951        assert_eq_m512i(r, e);
20952    }
20953
20954    #[simd_test(enable = "avx512bw,avx512vl")]
20955    const fn test_mm256_movm_epi16() {
20956        let a: __mmask16 = 0b11111111_11111111;
20957        let r = _mm256_movm_epi16(a);
20958        let e = _mm256_set1_epi16(
20959            1 << 15
20960                | 1 << 14
20961                | 1 << 13
20962                | 1 << 12
20963                | 1 << 11
20964                | 1 << 10
20965                | 1 << 9
20966                | 1 << 8
20967                | 1 << 7
20968                | 1 << 6
20969                | 1 << 5
20970                | 1 << 4
20971                | 1 << 3
20972                | 1 << 2
20973                | 1 << 1
20974                | 1 << 0,
20975        );
20976        assert_eq_m256i(r, e);
20977    }
20978
20979    #[simd_test(enable = "avx512bw,avx512vl")]
20980    const fn test_mm_movm_epi16() {
20981        let a: __mmask8 = 0b11111111;
20982        let r = _mm_movm_epi16(a);
20983        let e = _mm_set1_epi16(
20984            1 << 15
20985                | 1 << 14
20986                | 1 << 13
20987                | 1 << 12
20988                | 1 << 11
20989                | 1 << 10
20990                | 1 << 9
20991                | 1 << 8
20992                | 1 << 7
20993                | 1 << 6
20994                | 1 << 5
20995                | 1 << 4
20996                | 1 << 3
20997                | 1 << 2
20998                | 1 << 1
20999                | 1 << 0,
21000        );
21001        assert_eq_m128i(r, e);
21002    }
21003
21004    #[simd_test(enable = "avx512bw")]
21005    const fn test_mm512_movm_epi8() {
21006        let a: __mmask64 =
21007            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21008        let r = _mm512_movm_epi8(a);
21009        let e =
21010            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21011        assert_eq_m512i(r, e);
21012    }
21013
21014    #[simd_test(enable = "avx512bw,avx512vl")]
21015    const fn test_mm256_movm_epi8() {
21016        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
21017        let r = _mm256_movm_epi8(a);
21018        let e =
21019            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21020        assert_eq_m256i(r, e);
21021    }
21022
21023    #[simd_test(enable = "avx512bw,avx512vl")]
21024    const fn test_mm_movm_epi8() {
21025        let a: __mmask16 = 0b11111111_11111111;
21026        let r = _mm_movm_epi8(a);
21027        let e =
21028            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21029        assert_eq_m128i(r, e);
21030    }
21031
21032    #[simd_test(enable = "avx512bw")]
21033    const fn test_cvtmask32_u32() {
21034        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
21035        let r = _cvtmask32_u32(a);
21036        let e: u32 = 0b11001100_00110011_01100110_10011001;
21037        assert_eq!(r, e);
21038    }
21039
21040    #[simd_test(enable = "avx512bw")]
21041    const fn test_cvtu32_mask32() {
21042        let a: u32 = 0b11001100_00110011_01100110_10011001;
21043        let r = _cvtu32_mask32(a);
21044        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
21045        assert_eq!(r, e);
21046    }
21047
21048    #[simd_test(enable = "avx512bw")]
21049    const fn test_kadd_mask32() {
21050        let a: __mmask32 = 11;
21051        let b: __mmask32 = 22;
21052        let r = _kadd_mask32(a, b);
21053        let e: __mmask32 = 33;
21054        assert_eq!(r, e);
21055    }
21056
21057    #[simd_test(enable = "avx512bw")]
21058    const fn test_kadd_mask64() {
21059        let a: __mmask64 = 11;
21060        let b: __mmask64 = 22;
21061        let r = _kadd_mask64(a, b);
21062        let e: __mmask64 = 33;
21063        assert_eq!(r, e);
21064    }
21065
21066    #[simd_test(enable = "avx512bw")]
21067    const fn test_kand_mask32() {
21068        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21069        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21070        let r = _kand_mask32(a, b);
21071        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
21072        assert_eq!(r, e);
21073    }
21074
21075    #[simd_test(enable = "avx512bw")]
21076    const fn test_kand_mask64() {
21077        let a: __mmask64 =
21078            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21079        let b: __mmask64 =
21080            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21081        let r = _kand_mask64(a, b);
21082        let e: __mmask64 =
21083            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21084        assert_eq!(r, e);
21085    }
21086
21087    #[simd_test(enable = "avx512bw")]
21088    const fn test_knot_mask32() {
21089        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21090        let r = _knot_mask32(a);
21091        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
21092        assert_eq!(r, e);
21093    }
21094
21095    #[simd_test(enable = "avx512bw")]
21096    const fn test_knot_mask64() {
21097        let a: __mmask64 =
21098            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21099        let r = _knot_mask64(a);
21100        let e: __mmask64 =
21101            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21102        assert_eq!(r, e);
21103    }
21104
21105    #[simd_test(enable = "avx512bw")]
21106    const fn test_kandn_mask32() {
21107        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21108        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21109        let r = _kandn_mask32(a, b);
21110        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21111        assert_eq!(r, e);
21112    }
21113
21114    #[simd_test(enable = "avx512bw")]
21115    const fn test_kandn_mask64() {
21116        let a: __mmask64 =
21117            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21118        let b: __mmask64 =
21119            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21120        let r = _kandn_mask64(a, b);
21121        let e: __mmask64 =
21122            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21123        assert_eq!(r, e);
21124    }
21125
21126    #[simd_test(enable = "avx512bw")]
21127    const fn test_kor_mask32() {
21128        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21129        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21130        let r = _kor_mask32(a, b);
21131        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21132        assert_eq!(r, e);
21133    }
21134
21135    #[simd_test(enable = "avx512bw")]
21136    const fn test_kor_mask64() {
21137        let a: __mmask64 =
21138            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21139        let b: __mmask64 =
21140            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21141        let r = _kor_mask64(a, b);
21142        let e: __mmask64 =
21143            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21144        assert_eq!(r, e);
21145    }
21146
21147    #[simd_test(enable = "avx512bw")]
21148    const fn test_kxor_mask32() {
21149        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21150        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21151        let r = _kxor_mask32(a, b);
21152        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21153        assert_eq!(r, e);
21154    }
21155
21156    #[simd_test(enable = "avx512bw")]
21157    const fn test_kxor_mask64() {
21158        let a: __mmask64 =
21159            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21160        let b: __mmask64 =
21161            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21162        let r = _kxor_mask64(a, b);
21163        let e: __mmask64 =
21164            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21165        assert_eq!(r, e);
21166    }
21167
21168    #[simd_test(enable = "avx512bw")]
21169    const fn test_kxnor_mask32() {
21170        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21171        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21172        let r = _kxnor_mask32(a, b);
21173        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21174        assert_eq!(r, e);
21175    }
21176
21177    #[simd_test(enable = "avx512bw")]
21178    const fn test_kxnor_mask64() {
21179        let a: __mmask64 =
21180            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21181        let b: __mmask64 =
21182            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21183        let r = _kxnor_mask64(a, b);
21184        let e: __mmask64 =
21185            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21186        assert_eq!(r, e);
21187    }
21188
21189    #[simd_test(enable = "avx512bw")]
21190    const fn test_kortest_mask32_u8() {
21191        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21192        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21193        let mut all_ones: u8 = 0;
21194        let r = unsafe { _kortest_mask32_u8(a, b, &mut all_ones) };
21195        assert_eq!(r, 0);
21196        assert_eq!(all_ones, 1);
21197    }
21198
21199    #[simd_test(enable = "avx512bw")]
21200    const fn test_kortest_mask64_u8() {
21201        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21202        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21203        let mut all_ones: u8 = 0;
21204        let r = unsafe { _kortest_mask64_u8(a, b, &mut all_ones) };
21205        assert_eq!(r, 0);
21206        assert_eq!(all_ones, 0);
21207    }
21208
21209    #[simd_test(enable = "avx512bw")]
21210    const fn test_kortestc_mask32_u8() {
21211        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21212        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21213        let r = _kortestc_mask32_u8(a, b);
21214        assert_eq!(r, 1);
21215    }
21216
21217    #[simd_test(enable = "avx512bw")]
21218    const fn test_kortestc_mask64_u8() {
21219        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21220        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21221        let r = _kortestc_mask64_u8(a, b);
21222        assert_eq!(r, 0);
21223    }
21224
21225    #[simd_test(enable = "avx512bw")]
21226    const fn test_kortestz_mask32_u8() {
21227        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21228        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21229        let r = _kortestz_mask32_u8(a, b);
21230        assert_eq!(r, 0);
21231    }
21232
21233    #[simd_test(enable = "avx512bw")]
21234    const fn test_kortestz_mask64_u8() {
21235        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21236        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21237        let r = _kortestz_mask64_u8(a, b);
21238        assert_eq!(r, 0);
21239    }
21240
21241    #[simd_test(enable = "avx512bw")]
21242    const fn test_kshiftli_mask32() {
21243        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21244        let r = _kshiftli_mask32::<3>(a);
21245        let e: __mmask32 = 0b0100101101001011_0100101101001000;
21246        assert_eq!(r, e);
21247
21248        let r = _kshiftli_mask32::<31>(a);
21249        let e: __mmask32 = 0b1000000000000000_0000000000000000;
21250        assert_eq!(r, e);
21251
21252        let r = _kshiftli_mask32::<32>(a);
21253        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21254        assert_eq!(r, e);
21255
21256        let r = _kshiftli_mask32::<33>(a);
21257        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21258        assert_eq!(r, e);
21259    }
21260
21261    #[simd_test(enable = "avx512bw")]
21262    const fn test_kshiftli_mask64() {
21263        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21264        let r = _kshiftli_mask64::<3>(a);
21265        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
21266        assert_eq!(r, e);
21267
21268        let r = _kshiftli_mask64::<63>(a);
21269        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
21270        assert_eq!(r, e);
21271
21272        let r = _kshiftli_mask64::<64>(a);
21273        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21274        assert_eq!(r, e);
21275
21276        let r = _kshiftli_mask64::<65>(a);
21277        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21278        assert_eq!(r, e);
21279    }
21280
21281    #[simd_test(enable = "avx512bw")]
21282    const fn test_kshiftri_mask32() {
21283        let a: __mmask32 = 0b1010100101101001_0110100101101001;
21284        let r = _kshiftri_mask32::<3>(a);
21285        let e: __mmask32 = 0b0001010100101101_0010110100101101;
21286        assert_eq!(r, e);
21287
21288        let r = _kshiftri_mask32::<31>(a);
21289        let e: __mmask32 = 0b0000000000000000_0000000000000001;
21290        assert_eq!(r, e);
21291
21292        let r = _kshiftri_mask32::<32>(a);
21293        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21294        assert_eq!(r, e);
21295
21296        let r = _kshiftri_mask32::<33>(a);
21297        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21298        assert_eq!(r, e);
21299    }
21300
21301    #[simd_test(enable = "avx512bw")]
21302    const fn test_kshiftri_mask64() {
21303        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
21304        let r = _kshiftri_mask64::<3>(a);
21305        let e: __mmask64 = 0b1010100101101001_0110100101101001;
21306        assert_eq!(r, e);
21307
21308        let r = _kshiftri_mask64::<34>(a);
21309        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
21310        assert_eq!(r, e);
21311
21312        let r = _kshiftri_mask64::<35>(a);
21313        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21314        assert_eq!(r, e);
21315
21316        let r = _kshiftri_mask64::<64>(a);
21317        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21318        assert_eq!(r, e);
21319
21320        let r = _kshiftri_mask64::<65>(a);
21321        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21322        assert_eq!(r, e);
21323    }
21324
21325    #[simd_test(enable = "avx512bw")]
21326    const fn test_ktest_mask32_u8() {
21327        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21328        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21329        let mut and_not: u8 = 0;
21330        let r = unsafe { _ktest_mask32_u8(a, b, &mut and_not) };
21331        assert_eq!(r, 1);
21332        assert_eq!(and_not, 0);
21333    }
21334
21335    #[simd_test(enable = "avx512bw")]
21336    const fn test_ktestc_mask32_u8() {
21337        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21338        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21339        let r = _ktestc_mask32_u8(a, b);
21340        assert_eq!(r, 0);
21341    }
21342
21343    #[simd_test(enable = "avx512bw")]
21344    const fn test_ktestz_mask32_u8() {
21345        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21346        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21347        let r = _ktestz_mask32_u8(a, b);
21348        assert_eq!(r, 1);
21349    }
21350
21351    #[simd_test(enable = "avx512bw")]
21352    const fn test_ktest_mask64_u8() {
21353        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21354        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21355        let mut and_not: u8 = 0;
21356        let r = unsafe { _ktest_mask64_u8(a, b, &mut and_not) };
21357        assert_eq!(r, 1);
21358        assert_eq!(and_not, 0);
21359    }
21360
21361    #[simd_test(enable = "avx512bw")]
21362    const fn test_ktestc_mask64_u8() {
21363        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21364        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21365        let r = _ktestc_mask64_u8(a, b);
21366        assert_eq!(r, 0);
21367    }
21368
21369    #[simd_test(enable = "avx512bw")]
21370    const fn test_ktestz_mask64_u8() {
21371        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21372        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21373        let r = _ktestz_mask64_u8(a, b);
21374        assert_eq!(r, 1);
21375    }
21376
21377    #[simd_test(enable = "avx512bw")]
21378    const fn test_mm512_kunpackw() {
21379        let a: u32 = 0x00110011;
21380        let b: u32 = 0x00001011;
21381        let r = _mm512_kunpackw(a, b);
21382        let e: u32 = 0x00111011;
21383        assert_eq!(r, e);
21384    }
21385
21386    #[simd_test(enable = "avx512bw")]
21387    const fn test_mm512_kunpackd() {
21388        let a: u64 = 0x11001100_00110011;
21389        let b: u64 = 0x00101110_00001011;
21390        let r = _mm512_kunpackd(a, b);
21391        let e: u64 = 0x00110011_00001011;
21392        assert_eq!(r, e);
21393    }
21394
21395    #[simd_test(enable = "avx512bw")]
21396    const fn test_mm512_cvtepi16_epi8() {
21397        let a = _mm512_set1_epi16(2);
21398        let r = _mm512_cvtepi16_epi8(a);
21399        let e = _mm256_set1_epi8(2);
21400        assert_eq_m256i(r, e);
21401    }
21402
21403    #[simd_test(enable = "avx512bw")]
21404    const fn test_mm512_mask_cvtepi16_epi8() {
21405        let src = _mm256_set1_epi8(1);
21406        let a = _mm512_set1_epi16(2);
21407        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
21408        assert_eq_m256i(r, src);
21409        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21410        let e = _mm256_set1_epi8(2);
21411        assert_eq_m256i(r, e);
21412    }
21413
21414    #[simd_test(enable = "avx512bw")]
21415    const fn test_mm512_maskz_cvtepi16_epi8() {
21416        let a = _mm512_set1_epi16(2);
21417        let r = _mm512_maskz_cvtepi16_epi8(0, a);
21418        assert_eq_m256i(r, _mm256_setzero_si256());
21419        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21420        let e = _mm256_set1_epi8(2);
21421        assert_eq_m256i(r, e);
21422    }
21423
21424    #[simd_test(enable = "avx512bw,avx512vl")]
21425    const fn test_mm256_cvtepi16_epi8() {
21426        let a = _mm256_set1_epi16(2);
21427        let r = _mm256_cvtepi16_epi8(a);
21428        let e = _mm_set1_epi8(2);
21429        assert_eq_m128i(r, e);
21430    }
21431
21432    #[simd_test(enable = "avx512bw,avx512vl")]
21433    const fn test_mm256_mask_cvtepi16_epi8() {
21434        let src = _mm_set1_epi8(1);
21435        let a = _mm256_set1_epi16(2);
21436        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
21437        assert_eq_m128i(r, src);
21438        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
21439        let e = _mm_set1_epi8(2);
21440        assert_eq_m128i(r, e);
21441    }
21442
21443    #[simd_test(enable = "avx512bw,avx512vl")]
21444    const fn test_mm256_maskz_cvtepi16_epi8() {
21445        let a = _mm256_set1_epi16(2);
21446        let r = _mm256_maskz_cvtepi16_epi8(0, a);
21447        assert_eq_m128i(r, _mm_setzero_si128());
21448        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
21449        let e = _mm_set1_epi8(2);
21450        assert_eq_m128i(r, e);
21451    }
21452
21453    #[simd_test(enable = "avx512bw,avx512vl")]
21454    const fn test_mm_cvtepi16_epi8() {
21455        let a = _mm_set1_epi16(2);
21456        let r = _mm_cvtepi16_epi8(a);
21457        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21458        assert_eq_m128i(r, e);
21459    }
21460
21461    #[simd_test(enable = "avx512bw,avx512vl")]
21462    const fn test_mm_mask_cvtepi16_epi8() {
21463        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21464        let a = _mm_set1_epi16(2);
21465        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
21466        assert_eq_m128i(r, src);
21467        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
21468        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21469        assert_eq_m128i(r, e);
21470    }
21471
21472    #[simd_test(enable = "avx512bw,avx512vl")]
21473    const fn test_mm_maskz_cvtepi16_epi8() {
21474        let a = _mm_set1_epi16(2);
21475        let r = _mm_maskz_cvtepi16_epi8(0, a);
21476        assert_eq_m128i(r, _mm_setzero_si128());
21477        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
21478        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21479        assert_eq_m128i(r, e);
21480    }
21481
21482    #[simd_test(enable = "avx512bw")]
21483    fn test_mm512_cvtsepi16_epi8() {
21484        let a = _mm512_set1_epi16(i16::MAX);
21485        let r = _mm512_cvtsepi16_epi8(a);
21486        let e = _mm256_set1_epi8(i8::MAX);
21487        assert_eq_m256i(r, e);
21488    }
21489
21490    #[simd_test(enable = "avx512bw")]
21491    fn test_mm512_mask_cvtsepi16_epi8() {
21492        let src = _mm256_set1_epi8(1);
21493        let a = _mm512_set1_epi16(i16::MAX);
21494        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
21495        assert_eq_m256i(r, src);
21496        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21497        let e = _mm256_set1_epi8(i8::MAX);
21498        assert_eq_m256i(r, e);
21499    }
21500
21501    #[simd_test(enable = "avx512bw,avx512vl")]
21502    fn test_mm256_cvtsepi16_epi8() {
21503        let a = _mm256_set1_epi16(i16::MAX);
21504        let r = _mm256_cvtsepi16_epi8(a);
21505        let e = _mm_set1_epi8(i8::MAX);
21506        assert_eq_m128i(r, e);
21507    }
21508
21509    #[simd_test(enable = "avx512bw,avx512vl")]
21510    fn test_mm256_mask_cvtsepi16_epi8() {
21511        let src = _mm_set1_epi8(1);
21512        let a = _mm256_set1_epi16(i16::MAX);
21513        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
21514        assert_eq_m128i(r, src);
21515        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
21516        let e = _mm_set1_epi8(i8::MAX);
21517        assert_eq_m128i(r, e);
21518    }
21519
21520    #[simd_test(enable = "avx512bw,avx512vl")]
21521    fn test_mm256_maskz_cvtsepi16_epi8() {
21522        let a = _mm256_set1_epi16(i16::MAX);
21523        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
21524        assert_eq_m128i(r, _mm_setzero_si128());
21525        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
21526        let e = _mm_set1_epi8(i8::MAX);
21527        assert_eq_m128i(r, e);
21528    }
21529
21530    #[simd_test(enable = "avx512bw,avx512vl")]
21531    fn test_mm_cvtsepi16_epi8() {
21532        let a = _mm_set1_epi16(i16::MAX);
21533        let r = _mm_cvtsepi16_epi8(a);
21534        #[rustfmt::skip]
21535        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21536        assert_eq_m128i(r, e);
21537    }
21538
21539    #[simd_test(enable = "avx512bw,avx512vl")]
21540    fn test_mm_mask_cvtsepi16_epi8() {
21541        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21542        let a = _mm_set1_epi16(i16::MAX);
21543        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
21544        assert_eq_m128i(r, src);
21545        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
21546        #[rustfmt::skip]
21547        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21548        assert_eq_m128i(r, e);
21549    }
21550
21551    #[simd_test(enable = "avx512bw,avx512vl")]
21552    fn test_mm_maskz_cvtsepi16_epi8() {
21553        let a = _mm_set1_epi16(i16::MAX);
21554        let r = _mm_maskz_cvtsepi16_epi8(0, a);
21555        assert_eq_m128i(r, _mm_setzero_si128());
21556        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
21557        #[rustfmt::skip]
21558        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21559        assert_eq_m128i(r, e);
21560    }
21561
21562    #[simd_test(enable = "avx512bw")]
21563    fn test_mm512_maskz_cvtsepi16_epi8() {
21564        let a = _mm512_set1_epi16(i16::MAX);
21565        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
21566        assert_eq_m256i(r, _mm256_setzero_si256());
21567        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21568        let e = _mm256_set1_epi8(i8::MAX);
21569        assert_eq_m256i(r, e);
21570    }
21571
21572    #[simd_test(enable = "avx512bw")]
21573    fn test_mm512_cvtusepi16_epi8() {
21574        let a = _mm512_set1_epi16(i16::MIN);
21575        let r = _mm512_cvtusepi16_epi8(a);
21576        let e = _mm256_set1_epi8(-1);
21577        assert_eq_m256i(r, e);
21578    }
21579
21580    #[simd_test(enable = "avx512bw")]
21581    fn test_mm512_mask_cvtusepi16_epi8() {
21582        let src = _mm256_set1_epi8(1);
21583        let a = _mm512_set1_epi16(i16::MIN);
21584        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
21585        assert_eq_m256i(r, src);
21586        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21587        let e = _mm256_set1_epi8(-1);
21588        assert_eq_m256i(r, e);
21589    }
21590
21591    #[simd_test(enable = "avx512bw")]
21592    fn test_mm512_maskz_cvtusepi16_epi8() {
21593        let a = _mm512_set1_epi16(i16::MIN);
21594        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
21595        assert_eq_m256i(r, _mm256_setzero_si256());
21596        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21597        let e = _mm256_set1_epi8(-1);
21598        assert_eq_m256i(r, e);
21599    }
21600
21601    #[simd_test(enable = "avx512bw,avx512vl")]
21602    fn test_mm256_cvtusepi16_epi8() {
21603        let a = _mm256_set1_epi16(i16::MIN);
21604        let r = _mm256_cvtusepi16_epi8(a);
21605        let e = _mm_set1_epi8(-1);
21606        assert_eq_m128i(r, e);
21607    }
21608
21609    #[simd_test(enable = "avx512bw,avx512vl")]
21610    fn test_mm256_mask_cvtusepi16_epi8() {
21611        let src = _mm_set1_epi8(1);
21612        let a = _mm256_set1_epi16(i16::MIN);
21613        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
21614        assert_eq_m128i(r, src);
21615        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
21616        let e = _mm_set1_epi8(-1);
21617        assert_eq_m128i(r, e);
21618    }
21619
21620    #[simd_test(enable = "avx512bw,avx512vl")]
21621    fn test_mm256_maskz_cvtusepi16_epi8() {
21622        let a = _mm256_set1_epi16(i16::MIN);
21623        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
21624        assert_eq_m128i(r, _mm_setzero_si128());
21625        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
21626        let e = _mm_set1_epi8(-1);
21627        assert_eq_m128i(r, e);
21628    }
21629
21630    #[simd_test(enable = "avx512bw,avx512vl")]
21631    fn test_mm_cvtusepi16_epi8() {
21632        let a = _mm_set1_epi16(i16::MIN);
21633        let r = _mm_cvtusepi16_epi8(a);
21634        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21635        assert_eq_m128i(r, e);
21636    }
21637
21638    #[simd_test(enable = "avx512bw,avx512vl")]
21639    fn test_mm_mask_cvtusepi16_epi8() {
21640        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21641        let a = _mm_set1_epi16(i16::MIN);
21642        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
21643        assert_eq_m128i(r, src);
21644        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
21645        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21646        assert_eq_m128i(r, e);
21647    }
21648
21649    #[simd_test(enable = "avx512bw,avx512vl")]
21650    fn test_mm_maskz_cvtusepi16_epi8() {
21651        let a = _mm_set1_epi16(i16::MIN);
21652        let r = _mm_maskz_cvtusepi16_epi8(0, a);
21653        assert_eq_m128i(r, _mm_setzero_si128());
21654        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
21655        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21656        assert_eq_m128i(r, e);
21657    }
21658
21659    #[simd_test(enable = "avx512bw")]
21660    const fn test_mm512_cvtepi8_epi16() {
21661        let a = _mm256_set1_epi8(2);
21662        let r = _mm512_cvtepi8_epi16(a);
21663        let e = _mm512_set1_epi16(2);
21664        assert_eq_m512i(r, e);
21665    }
21666
21667    #[simd_test(enable = "avx512bw")]
21668    const fn test_mm512_mask_cvtepi8_epi16() {
21669        let src = _mm512_set1_epi16(1);
21670        let a = _mm256_set1_epi8(2);
21671        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
21672        assert_eq_m512i(r, src);
21673        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21674        let e = _mm512_set1_epi16(2);
21675        assert_eq_m512i(r, e);
21676    }
21677
21678    #[simd_test(enable = "avx512bw")]
21679    const fn test_mm512_maskz_cvtepi8_epi16() {
21680        let a = _mm256_set1_epi8(2);
21681        let r = _mm512_maskz_cvtepi8_epi16(0, a);
21682        assert_eq_m512i(r, _mm512_setzero_si512());
21683        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
21684        let e = _mm512_set1_epi16(2);
21685        assert_eq_m512i(r, e);
21686    }
21687
21688    #[simd_test(enable = "avx512bw,avx512vl")]
21689    const fn test_mm256_mask_cvtepi8_epi16() {
21690        let src = _mm256_set1_epi16(1);
21691        let a = _mm_set1_epi8(2);
21692        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
21693        assert_eq_m256i(r, src);
21694        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
21695        let e = _mm256_set1_epi16(2);
21696        assert_eq_m256i(r, e);
21697    }
21698
21699    #[simd_test(enable = "avx512bw,avx512vl")]
21700    const fn test_mm256_maskz_cvtepi8_epi16() {
21701        let a = _mm_set1_epi8(2);
21702        let r = _mm256_maskz_cvtepi8_epi16(0, a);
21703        assert_eq_m256i(r, _mm256_setzero_si256());
21704        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
21705        let e = _mm256_set1_epi16(2);
21706        assert_eq_m256i(r, e);
21707    }
21708
21709    #[simd_test(enable = "avx512bw,avx512vl")]
21710    const fn test_mm_mask_cvtepi8_epi16() {
21711        let src = _mm_set1_epi16(1);
21712        let a = _mm_set1_epi8(2);
21713        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
21714        assert_eq_m128i(r, src);
21715        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
21716        let e = _mm_set1_epi16(2);
21717        assert_eq_m128i(r, e);
21718    }
21719
21720    #[simd_test(enable = "avx512bw,avx512vl")]
21721    const fn test_mm_maskz_cvtepi8_epi16() {
21722        let a = _mm_set1_epi8(2);
21723        let r = _mm_maskz_cvtepi8_epi16(0, a);
21724        assert_eq_m128i(r, _mm_setzero_si128());
21725        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
21726        let e = _mm_set1_epi16(2);
21727        assert_eq_m128i(r, e);
21728    }
21729
21730    #[simd_test(enable = "avx512bw")]
21731    const fn test_mm512_cvtepu8_epi16() {
21732        let a = _mm256_set1_epi8(2);
21733        let r = _mm512_cvtepu8_epi16(a);
21734        let e = _mm512_set1_epi16(2);
21735        assert_eq_m512i(r, e);
21736    }
21737
21738    #[simd_test(enable = "avx512bw")]
21739    const fn test_mm512_mask_cvtepu8_epi16() {
21740        let src = _mm512_set1_epi16(1);
21741        let a = _mm256_set1_epi8(2);
21742        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
21743        assert_eq_m512i(r, src);
21744        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21745        let e = _mm512_set1_epi16(2);
21746        assert_eq_m512i(r, e);
21747    }
21748
21749    #[simd_test(enable = "avx512bw")]
21750    const fn test_mm512_maskz_cvtepu8_epi16() {
21751        let a = _mm256_set1_epi8(2);
21752        let r = _mm512_maskz_cvtepu8_epi16(0, a);
21753        assert_eq_m512i(r, _mm512_setzero_si512());
21754        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
21755        let e = _mm512_set1_epi16(2);
21756        assert_eq_m512i(r, e);
21757    }
21758
21759    #[simd_test(enable = "avx512bw,avx512vl")]
21760    const fn test_mm256_mask_cvtepu8_epi16() {
21761        let src = _mm256_set1_epi16(1);
21762        let a = _mm_set1_epi8(2);
21763        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
21764        assert_eq_m256i(r, src);
21765        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
21766        let e = _mm256_set1_epi16(2);
21767        assert_eq_m256i(r, e);
21768    }
21769
21770    #[simd_test(enable = "avx512bw,avx512vl")]
21771    const fn test_mm256_maskz_cvtepu8_epi16() {
21772        let a = _mm_set1_epi8(2);
21773        let r = _mm256_maskz_cvtepu8_epi16(0, a);
21774        assert_eq_m256i(r, _mm256_setzero_si256());
21775        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
21776        let e = _mm256_set1_epi16(2);
21777        assert_eq_m256i(r, e);
21778    }
21779
21780    #[simd_test(enable = "avx512bw,avx512vl")]
21781    const fn test_mm_mask_cvtepu8_epi16() {
21782        let src = _mm_set1_epi16(1);
21783        let a = _mm_set1_epi8(2);
21784        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
21785        assert_eq_m128i(r, src);
21786        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
21787        let e = _mm_set1_epi16(2);
21788        assert_eq_m128i(r, e);
21789    }
21790
21791    #[simd_test(enable = "avx512bw,avx512vl")]
21792    const fn test_mm_maskz_cvtepu8_epi16() {
21793        let a = _mm_set1_epi8(2);
21794        let r = _mm_maskz_cvtepu8_epi16(0, a);
21795        assert_eq_m128i(r, _mm_setzero_si128());
21796        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
21797        let e = _mm_set1_epi16(2);
21798        assert_eq_m128i(r, e);
21799    }
21800
21801    #[simd_test(enable = "avx512bw")]
21802    const fn test_mm512_bslli_epi128() {
21803        #[rustfmt::skip]
21804        let a = _mm512_set_epi8(
21805            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21806            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21807            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21808            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21809        );
21810        let r = _mm512_bslli_epi128::<9>(a);
21811        #[rustfmt::skip]
21812        let e = _mm512_set_epi8(
21813            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21814            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21815            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21816            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21817        );
21818        assert_eq_m512i(r, e);
21819    }
21820
21821    #[simd_test(enable = "avx512bw")]
21822    const fn test_mm512_bsrli_epi128() {
21823        #[rustfmt::skip]
21824        let a = _mm512_set_epi8(
21825            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21826            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
21827            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
21828            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
21829        );
21830        let r = _mm512_bsrli_epi128::<3>(a);
21831        #[rustfmt::skip]
21832        let e = _mm512_set_epi8(
21833            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
21834            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
21835            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
21836            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
21837        );
21838        assert_eq_m512i(r, e);
21839    }
21840
21841    #[simd_test(enable = "avx512bw")]
21842    const fn test_mm512_alignr_epi8() {
21843        #[rustfmt::skip]
21844        let a = _mm512_set_epi8(
21845            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21846            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21847            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21848            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21849        );
21850        let b = _mm512_set1_epi8(1);
21851        let r = _mm512_alignr_epi8::<14>(a, b);
21852        #[rustfmt::skip]
21853        let e = _mm512_set_epi8(
21854            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21855            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21856            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21857            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21858        );
21859        assert_eq_m512i(r, e);
21860    }
21861
21862    #[simd_test(enable = "avx512bw")]
21863    const fn test_mm512_mask_alignr_epi8() {
21864        #[rustfmt::skip]
21865        let a = _mm512_set_epi8(
21866            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21867            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21868            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21869            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21870        );
21871        let b = _mm512_set1_epi8(1);
21872        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
21873        assert_eq_m512i(r, a);
21874        let r = _mm512_mask_alignr_epi8::<14>(
21875            a,
21876            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21877            a,
21878            b,
21879        );
21880        #[rustfmt::skip]
21881        let e = _mm512_set_epi8(
21882            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21883            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21884            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21885            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21886        );
21887        assert_eq_m512i(r, e);
21888    }
21889
21890    #[simd_test(enable = "avx512bw")]
21891    const fn test_mm512_maskz_alignr_epi8() {
21892        #[rustfmt::skip]
21893        let a = _mm512_set_epi8(
21894            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21895            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21896            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21897            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21898        );
21899        let b = _mm512_set1_epi8(1);
21900        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
21901        assert_eq_m512i(r, _mm512_setzero_si512());
21902        let r = _mm512_maskz_alignr_epi8::<14>(
21903            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21904            a,
21905            b,
21906        );
21907        #[rustfmt::skip]
21908        let e = _mm512_set_epi8(
21909            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21910            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21911            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21912            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21913        );
21914        assert_eq_m512i(r, e);
21915    }
21916
21917    #[simd_test(enable = "avx512bw,avx512vl")]
21918    const fn test_mm256_mask_alignr_epi8() {
21919        #[rustfmt::skip]
21920        let a = _mm256_set_epi8(
21921            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21922            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21923        );
21924        let b = _mm256_set1_epi8(1);
21925        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21926        assert_eq_m256i(r, a);
21927        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21928        #[rustfmt::skip]
21929        let e = _mm256_set_epi8(
21930            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21931            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21932        );
21933        assert_eq_m256i(r, e);
21934    }
21935
21936    #[simd_test(enable = "avx512bw,avx512vl")]
21937    const fn test_mm256_maskz_alignr_epi8() {
21938        #[rustfmt::skip]
21939        let a = _mm256_set_epi8(
21940            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21941            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21942        );
21943        let b = _mm256_set1_epi8(1);
21944        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21945        assert_eq_m256i(r, _mm256_setzero_si256());
21946        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21947        #[rustfmt::skip]
21948        let e = _mm256_set_epi8(
21949            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21950            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21951        );
21952        assert_eq_m256i(r, e);
21953    }
21954
21955    #[simd_test(enable = "avx512bw,avx512vl")]
21956    const fn test_mm_mask_alignr_epi8() {
21957        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21958        let b = _mm_set1_epi8(1);
21959        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21960        assert_eq_m128i(r, a);
21961        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21962        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21963        assert_eq_m128i(r, e);
21964    }
21965
21966    #[simd_test(enable = "avx512bw,avx512vl")]
21967    const fn test_mm_maskz_alignr_epi8() {
21968        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21969        let b = _mm_set1_epi8(1);
21970        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21971        assert_eq_m128i(r, _mm_setzero_si128());
21972        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21973        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21974        assert_eq_m128i(r, e);
21975    }
21976
21977    #[simd_test(enable = "avx512bw")]
21978    fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21979        let a = _mm512_set1_epi16(i16::MAX);
21980        let mut r = _mm256_undefined_si256();
21981        unsafe {
21982            _mm512_mask_cvtsepi16_storeu_epi8(
21983                &mut r as *mut _ as *mut i8,
21984                0b11111111_11111111_11111111_11111111,
21985                a,
21986            );
21987        }
21988        let e = _mm256_set1_epi8(i8::MAX);
21989        assert_eq_m256i(r, e);
21990    }
21991
21992    #[simd_test(enable = "avx512bw,avx512vl")]
21993    fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21994        let a = _mm256_set1_epi16(i16::MAX);
21995        let mut r = _mm_undefined_si128();
21996        unsafe {
21997            _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21998        }
21999        let e = _mm_set1_epi8(i8::MAX);
22000        assert_eq_m128i(r, e);
22001    }
22002
22003    #[simd_test(enable = "avx512bw,avx512vl")]
22004    fn test_mm_mask_cvtsepi16_storeu_epi8() {
22005        let a = _mm_set1_epi16(i16::MAX);
22006        let mut r = _mm_set1_epi8(0);
22007        unsafe {
22008            _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22009        }
22010        #[rustfmt::skip]
22011        let e = _mm_set_epi8(
22012            0, 0, 0, 0, 0, 0, 0, 0,
22013            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
22014        );
22015        assert_eq_m128i(r, e);
22016    }
22017
22018    #[simd_test(enable = "avx512bw")]
22019    fn test_mm512_mask_cvtepi16_storeu_epi8() {
22020        let a = _mm512_set1_epi16(8);
22021        let mut r = _mm256_undefined_si256();
22022        unsafe {
22023            _mm512_mask_cvtepi16_storeu_epi8(
22024                &mut r as *mut _ as *mut i8,
22025                0b11111111_11111111_11111111_11111111,
22026                a,
22027            );
22028        }
22029        let e = _mm256_set1_epi8(8);
22030        assert_eq_m256i(r, e);
22031    }
22032
22033    #[simd_test(enable = "avx512bw,avx512vl")]
22034    fn test_mm256_mask_cvtepi16_storeu_epi8() {
22035        let a = _mm256_set1_epi16(8);
22036        let mut r = _mm_undefined_si128();
22037        unsafe {
22038            _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22039        }
22040        let e = _mm_set1_epi8(8);
22041        assert_eq_m128i(r, e);
22042    }
22043
22044    #[simd_test(enable = "avx512bw,avx512vl")]
22045    fn test_mm_mask_cvtepi16_storeu_epi8() {
22046        let a = _mm_set1_epi16(8);
22047        let mut r = _mm_set1_epi8(0);
22048        unsafe {
22049            _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22050        }
22051        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
22052        assert_eq_m128i(r, e);
22053    }
22054
22055    #[simd_test(enable = "avx512bw")]
22056    fn test_mm512_mask_cvtusepi16_storeu_epi8() {
22057        let a = _mm512_set1_epi16(i16::MAX);
22058        let mut r = _mm256_undefined_si256();
22059        unsafe {
22060            _mm512_mask_cvtusepi16_storeu_epi8(
22061                &mut r as *mut _ as *mut i8,
22062                0b11111111_11111111_11111111_11111111,
22063                a,
22064            );
22065        }
22066        let e = _mm256_set1_epi8(u8::MAX as i8);
22067        assert_eq_m256i(r, e);
22068    }
22069
22070    #[simd_test(enable = "avx512bw,avx512vl")]
22071    fn test_mm256_mask_cvtusepi16_storeu_epi8() {
22072        let a = _mm256_set1_epi16(i16::MAX);
22073        let mut r = _mm_undefined_si128();
22074        unsafe {
22075            _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22076        }
22077        let e = _mm_set1_epi8(u8::MAX as i8);
22078        assert_eq_m128i(r, e);
22079    }
22080
22081    #[simd_test(enable = "avx512bw,avx512vl")]
22082    fn test_mm_mask_cvtusepi16_storeu_epi8() {
22083        let a = _mm_set1_epi16(i16::MAX);
22084        let mut r = _mm_set1_epi8(0);
22085        unsafe {
22086            _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22087        }
22088        #[rustfmt::skip]
22089        let e = _mm_set_epi8(
22090            0, 0, 0, 0,
22091            0, 0, 0, 0,
22092            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22093            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22094        );
22095        assert_eq_m128i(r, e);
22096    }
22097}