core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17pub fn _mm512_abs_epi16(a: __m512i) -> __m512i {
18    unsafe {
19        let a = a.as_i16x32();
20        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
21        transmute(simd_select(cmp, a, simd_neg(a)))
22    }
23}
24
25/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26///
27/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
28#[inline]
29#[target_feature(enable = "avx512bw")]
30#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31#[cfg_attr(test, assert_instr(vpabsw))]
32pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
33    unsafe {
34        let abs = _mm512_abs_epi16(a).as_i16x32();
35        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
36    }
37}
38
39/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
42#[inline]
43#[target_feature(enable = "avx512bw")]
44#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
45#[cfg_attr(test, assert_instr(vpabsw))]
46pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
47    unsafe {
48        let abs = _mm512_abs_epi16(a).as_i16x32();
49        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
50    }
51}
52
53/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
56#[inline]
57#[target_feature(enable = "avx512bw,avx512vl")]
58#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
59#[cfg_attr(test, assert_instr(vpabsw))]
60pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
61    unsafe {
62        let abs = _mm256_abs_epi16(a).as_i16x16();
63        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
64    }
65}
66
67/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
68///
69/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
70#[inline]
71#[target_feature(enable = "avx512bw,avx512vl")]
72#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
73#[cfg_attr(test, assert_instr(vpabsw))]
74pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
75    unsafe {
76        let abs = _mm256_abs_epi16(a).as_i16x16();
77        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
78    }
79}
80
81/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
84#[inline]
85#[target_feature(enable = "avx512bw,avx512vl")]
86#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
87#[cfg_attr(test, assert_instr(vpabsw))]
88pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
89    unsafe {
90        let abs = _mm_abs_epi16(a).as_i16x8();
91        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
92    }
93}
94
95/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
96///
97/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
98#[inline]
99#[target_feature(enable = "avx512bw,avx512vl")]
100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
101#[cfg_attr(test, assert_instr(vpabsw))]
102pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
103    unsafe {
104        let abs = _mm_abs_epi16(a).as_i16x8();
105        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
106    }
107}
108
109/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
110///
111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
112#[inline]
113#[target_feature(enable = "avx512bw")]
114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
115#[cfg_attr(test, assert_instr(vpabsb))]
116pub fn _mm512_abs_epi8(a: __m512i) -> __m512i {
117    unsafe {
118        let a = a.as_i8x64();
119        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
120        transmute(simd_select(cmp, a, simd_neg(a)))
121    }
122}
123
124/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
127#[inline]
128#[target_feature(enable = "avx512bw")]
129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130#[cfg_attr(test, assert_instr(vpabsb))]
131pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
132    unsafe {
133        let abs = _mm512_abs_epi8(a).as_i8x64();
134        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
135    }
136}
137
138/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
141#[inline]
142#[target_feature(enable = "avx512bw")]
143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
144#[cfg_attr(test, assert_instr(vpabsb))]
145pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
146    unsafe {
147        let abs = _mm512_abs_epi8(a).as_i8x64();
148        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
149    }
150}
151
152/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
155#[inline]
156#[target_feature(enable = "avx512bw,avx512vl")]
157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
158#[cfg_attr(test, assert_instr(vpabsb))]
159pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
160    unsafe {
161        let abs = _mm256_abs_epi8(a).as_i8x32();
162        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
163    }
164}
165
166/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
167///
168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
169#[inline]
170#[target_feature(enable = "avx512bw,avx512vl")]
171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
172#[cfg_attr(test, assert_instr(vpabsb))]
173pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
174    unsafe {
175        let abs = _mm256_abs_epi8(a).as_i8x32();
176        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
177    }
178}
179
180/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
181///
182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
183#[inline]
184#[target_feature(enable = "avx512bw,avx512vl")]
185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
186#[cfg_attr(test, assert_instr(vpabsb))]
187pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
188    unsafe {
189        let abs = _mm_abs_epi8(a).as_i8x16();
190        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
191    }
192}
193
194/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
195///
196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
197#[inline]
198#[target_feature(enable = "avx512bw,avx512vl")]
199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
200#[cfg_attr(test, assert_instr(vpabsb))]
201pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
202    unsafe {
203        let abs = _mm_abs_epi8(a).as_i8x16();
204        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
205    }
206}
207
208/// Add packed 16-bit integers in a and b, and store the results in dst.
209///
210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
211#[inline]
212#[target_feature(enable = "avx512bw")]
213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
214#[cfg_attr(test, assert_instr(vpaddw))]
215pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
216    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
217}
218
219/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
220///
221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
222#[inline]
223#[target_feature(enable = "avx512bw")]
224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
225#[cfg_attr(test, assert_instr(vpaddw))]
226pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
227    unsafe {
228        let add = _mm512_add_epi16(a, b).as_i16x32();
229        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
230    }
231}
232
233/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
234///
235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
236#[inline]
237#[target_feature(enable = "avx512bw")]
238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
239#[cfg_attr(test, assert_instr(vpaddw))]
240pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
241    unsafe {
242        let add = _mm512_add_epi16(a, b).as_i16x32();
243        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
244    }
245}
246
247/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
248///
249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
250#[inline]
251#[target_feature(enable = "avx512bw,avx512vl")]
252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
253#[cfg_attr(test, assert_instr(vpaddw))]
254pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
255    unsafe {
256        let add = _mm256_add_epi16(a, b).as_i16x16();
257        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
258    }
259}
260
261/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
264#[inline]
265#[target_feature(enable = "avx512bw,avx512vl")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpaddw))]
268pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
269    unsafe {
270        let add = _mm256_add_epi16(a, b).as_i16x16();
271        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
272    }
273}
274
275/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
278#[inline]
279#[target_feature(enable = "avx512bw,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr(vpaddw))]
282pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
283    unsafe {
284        let add = _mm_add_epi16(a, b).as_i16x8();
285        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
286    }
287}
288
289/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
292#[inline]
293#[target_feature(enable = "avx512bw,avx512vl")]
294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
295#[cfg_attr(test, assert_instr(vpaddw))]
296pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
297    unsafe {
298        let add = _mm_add_epi16(a, b).as_i16x8();
299        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
300    }
301}
302
303/// Add packed 8-bit integers in a and b, and store the results in dst.
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
306#[inline]
307#[target_feature(enable = "avx512bw")]
308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
309#[cfg_attr(test, assert_instr(vpaddb))]
310pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
311    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
312}
313
314/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
315///
316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
317#[inline]
318#[target_feature(enable = "avx512bw")]
319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
320#[cfg_attr(test, assert_instr(vpaddb))]
321pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
322    unsafe {
323        let add = _mm512_add_epi8(a, b).as_i8x64();
324        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
325    }
326}
327
328/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
329///
330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
331#[inline]
332#[target_feature(enable = "avx512bw")]
333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
334#[cfg_attr(test, assert_instr(vpaddb))]
335pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
336    unsafe {
337        let add = _mm512_add_epi8(a, b).as_i8x64();
338        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
339    }
340}
341
342/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
343///
344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
345#[inline]
346#[target_feature(enable = "avx512bw,avx512vl")]
347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
348#[cfg_attr(test, assert_instr(vpaddb))]
349pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
350    unsafe {
351        let add = _mm256_add_epi8(a, b).as_i8x32();
352        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
353    }
354}
355
356/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
357///
358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
359#[inline]
360#[target_feature(enable = "avx512bw,avx512vl")]
361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
362#[cfg_attr(test, assert_instr(vpaddb))]
363pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
364    unsafe {
365        let add = _mm256_add_epi8(a, b).as_i8x32();
366        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
367    }
368}
369
370/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
371///
372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
373#[inline]
374#[target_feature(enable = "avx512bw,avx512vl")]
375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
376#[cfg_attr(test, assert_instr(vpaddb))]
377pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
378    unsafe {
379        let add = _mm_add_epi8(a, b).as_i8x16();
380        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
381    }
382}
383
384/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
387#[inline]
388#[target_feature(enable = "avx512bw,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vpaddb))]
391pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
392    unsafe {
393        let add = _mm_add_epi8(a, b).as_i8x16();
394        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
395    }
396}
397
398/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
399///
400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
401#[inline]
402#[target_feature(enable = "avx512bw")]
403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
404#[cfg_attr(test, assert_instr(vpaddusw))]
405pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
406    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
407}
408
409/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
410///
411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
412#[inline]
413#[target_feature(enable = "avx512bw")]
414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
415#[cfg_attr(test, assert_instr(vpaddusw))]
416pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
417    unsafe {
418        let add = _mm512_adds_epu16(a, b).as_u16x32();
419        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
420    }
421}
422
423/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
424///
425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
426#[inline]
427#[target_feature(enable = "avx512bw")]
428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429#[cfg_attr(test, assert_instr(vpaddusw))]
430pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
431    unsafe {
432        let add = _mm512_adds_epu16(a, b).as_u16x32();
433        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
434    }
435}
436
437/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
438///
439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
440#[inline]
441#[target_feature(enable = "avx512bw,avx512vl")]
442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
443#[cfg_attr(test, assert_instr(vpaddusw))]
444pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
445    unsafe {
446        let add = _mm256_adds_epu16(a, b).as_u16x16();
447        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
448    }
449}
450
451/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
454#[inline]
455#[target_feature(enable = "avx512bw,avx512vl")]
456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
457#[cfg_attr(test, assert_instr(vpaddusw))]
458pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
459    unsafe {
460        let add = _mm256_adds_epu16(a, b).as_u16x16();
461        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
462    }
463}
464
465/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
466///
467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
468#[inline]
469#[target_feature(enable = "avx512bw,avx512vl")]
470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
471#[cfg_attr(test, assert_instr(vpaddusw))]
472pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
473    unsafe {
474        let add = _mm_adds_epu16(a, b).as_u16x8();
475        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
476    }
477}
478
479/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
480///
481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
482#[inline]
483#[target_feature(enable = "avx512bw,avx512vl")]
484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
485#[cfg_attr(test, assert_instr(vpaddusw))]
486pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
487    unsafe {
488        let add = _mm_adds_epu16(a, b).as_u16x8();
489        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
490    }
491}
492
493/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
494///
495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
496#[inline]
497#[target_feature(enable = "avx512bw")]
498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
499#[cfg_attr(test, assert_instr(vpaddusb))]
500pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
501    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
502}
503
504/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
507#[inline]
508#[target_feature(enable = "avx512bw")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vpaddusb))]
511pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
512    unsafe {
513        let add = _mm512_adds_epu8(a, b).as_u8x64();
514        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
515    }
516}
517
518/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
519///
520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
521#[inline]
522#[target_feature(enable = "avx512bw")]
523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
524#[cfg_attr(test, assert_instr(vpaddusb))]
525pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
526    unsafe {
527        let add = _mm512_adds_epu8(a, b).as_u8x64();
528        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
529    }
530}
531
532/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
533///
534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
535#[inline]
536#[target_feature(enable = "avx512bw,avx512vl")]
537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
538#[cfg_attr(test, assert_instr(vpaddusb))]
539pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
540    unsafe {
541        let add = _mm256_adds_epu8(a, b).as_u8x32();
542        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
543    }
544}
545
546/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
547///
548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
549#[inline]
550#[target_feature(enable = "avx512bw,avx512vl")]
551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
552#[cfg_attr(test, assert_instr(vpaddusb))]
553pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
554    unsafe {
555        let add = _mm256_adds_epu8(a, b).as_u8x32();
556        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
557    }
558}
559
560/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
561///
562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
563#[inline]
564#[target_feature(enable = "avx512bw,avx512vl")]
565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
566#[cfg_attr(test, assert_instr(vpaddusb))]
567pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
568    unsafe {
569        let add = _mm_adds_epu8(a, b).as_u8x16();
570        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
571    }
572}
573
574/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
575///
576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
577#[inline]
578#[target_feature(enable = "avx512bw,avx512vl")]
579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
580#[cfg_attr(test, assert_instr(vpaddusb))]
581pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
582    unsafe {
583        let add = _mm_adds_epu8(a, b).as_u8x16();
584        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
585    }
586}
587
588/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
589///
590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
591#[inline]
592#[target_feature(enable = "avx512bw")]
593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
594#[cfg_attr(test, assert_instr(vpaddsw))]
595pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
596    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
597}
598
599/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
600///
601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
602#[inline]
603#[target_feature(enable = "avx512bw")]
604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
605#[cfg_attr(test, assert_instr(vpaddsw))]
606pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
607    unsafe {
608        let add = _mm512_adds_epi16(a, b).as_i16x32();
609        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
610    }
611}
612
613/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
614///
615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
616#[inline]
617#[target_feature(enable = "avx512bw")]
618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
619#[cfg_attr(test, assert_instr(vpaddsw))]
620pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
621    unsafe {
622        let add = _mm512_adds_epi16(a, b).as_i16x32();
623        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
624    }
625}
626
627/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
628///
629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
630#[inline]
631#[target_feature(enable = "avx512bw,avx512vl")]
632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
633#[cfg_attr(test, assert_instr(vpaddsw))]
634pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
635    unsafe {
636        let add = _mm256_adds_epi16(a, b).as_i16x16();
637        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
638    }
639}
640
641/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
642///
643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
644#[inline]
645#[target_feature(enable = "avx512bw,avx512vl")]
646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
647#[cfg_attr(test, assert_instr(vpaddsw))]
648pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
649    unsafe {
650        let add = _mm256_adds_epi16(a, b).as_i16x16();
651        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
652    }
653}
654
655/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
656///
657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
658#[inline]
659#[target_feature(enable = "avx512bw,avx512vl")]
660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
661#[cfg_attr(test, assert_instr(vpaddsw))]
662pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
663    unsafe {
664        let add = _mm_adds_epi16(a, b).as_i16x8();
665        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
666    }
667}
668
669/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
672#[inline]
673#[target_feature(enable = "avx512bw,avx512vl")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddsw))]
676pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
677    unsafe {
678        let add = _mm_adds_epi16(a, b).as_i16x8();
679        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
680    }
681}
682
683/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
684///
685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
686#[inline]
687#[target_feature(enable = "avx512bw")]
688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
689#[cfg_attr(test, assert_instr(vpaddsb))]
690pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
691    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
692}
693
694/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
695///
696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
697#[inline]
698#[target_feature(enable = "avx512bw")]
699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
700#[cfg_attr(test, assert_instr(vpaddsb))]
701pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
702    unsafe {
703        let add = _mm512_adds_epi8(a, b).as_i8x64();
704        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
705    }
706}
707
708/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
709///
710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
711#[inline]
712#[target_feature(enable = "avx512bw")]
713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
714#[cfg_attr(test, assert_instr(vpaddsb))]
715pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
716    unsafe {
717        let add = _mm512_adds_epi8(a, b).as_i8x64();
718        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
719    }
720}
721
722/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
723///
724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
725#[inline]
726#[target_feature(enable = "avx512bw,avx512vl")]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728#[cfg_attr(test, assert_instr(vpaddsb))]
729pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
730    unsafe {
731        let add = _mm256_adds_epi8(a, b).as_i8x32();
732        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
733    }
734}
735
736/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
737///
738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
739#[inline]
740#[target_feature(enable = "avx512bw,avx512vl")]
741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
742#[cfg_attr(test, assert_instr(vpaddsb))]
743pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
744    unsafe {
745        let add = _mm256_adds_epi8(a, b).as_i8x32();
746        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
747    }
748}
749
750/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
751///
752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
753#[inline]
754#[target_feature(enable = "avx512bw,avx512vl")]
755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
756#[cfg_attr(test, assert_instr(vpaddsb))]
757pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
758    unsafe {
759        let add = _mm_adds_epi8(a, b).as_i8x16();
760        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
761    }
762}
763
764/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
765///
766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
767#[inline]
768#[target_feature(enable = "avx512bw,avx512vl")]
769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
770#[cfg_attr(test, assert_instr(vpaddsb))]
771pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
772    unsafe {
773        let add = _mm_adds_epi8(a, b).as_i8x16();
774        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
775    }
776}
777
778/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
779///
780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
781#[inline]
782#[target_feature(enable = "avx512bw")]
783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
784#[cfg_attr(test, assert_instr(vpsubw))]
785pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
786    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
787}
788
789/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
792#[inline]
793#[target_feature(enable = "avx512bw")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpsubw))]
796pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
797    unsafe {
798        let sub = _mm512_sub_epi16(a, b).as_i16x32();
799        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
800    }
801}
802
803/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
804///
805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
806#[inline]
807#[target_feature(enable = "avx512bw")]
808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
809#[cfg_attr(test, assert_instr(vpsubw))]
810pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
811    unsafe {
812        let sub = _mm512_sub_epi16(a, b).as_i16x32();
813        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
814    }
815}
816
817/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
818///
819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
820#[inline]
821#[target_feature(enable = "avx512bw,avx512vl")]
822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
823#[cfg_attr(test, assert_instr(vpsubw))]
824pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
825    unsafe {
826        let sub = _mm256_sub_epi16(a, b).as_i16x16();
827        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
828    }
829}
830
831/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
832///
833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
834#[inline]
835#[target_feature(enable = "avx512bw,avx512vl")]
836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
837#[cfg_attr(test, assert_instr(vpsubw))]
838pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
839    unsafe {
840        let sub = _mm256_sub_epi16(a, b).as_i16x16();
841        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
842    }
843}
844
845/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
846///
847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
848#[inline]
849#[target_feature(enable = "avx512bw,avx512vl")]
850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
851#[cfg_attr(test, assert_instr(vpsubw))]
852pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
853    unsafe {
854        let sub = _mm_sub_epi16(a, b).as_i16x8();
855        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
856    }
857}
858
859/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
860///
861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
862#[inline]
863#[target_feature(enable = "avx512bw,avx512vl")]
864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
865#[cfg_attr(test, assert_instr(vpsubw))]
866pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867    unsafe {
868        let sub = _mm_sub_epi16(a, b).as_i16x8();
869        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
870    }
871}
872
873/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
876#[inline]
877#[target_feature(enable = "avx512bw")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vpsubb))]
880pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
881    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
882}
883
884/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
885///
886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
887#[inline]
888#[target_feature(enable = "avx512bw")]
889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
890#[cfg_attr(test, assert_instr(vpsubb))]
891pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
892    unsafe {
893        let sub = _mm512_sub_epi8(a, b).as_i8x64();
894        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
895    }
896}
897
898/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
899///
900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
901#[inline]
902#[target_feature(enable = "avx512bw")]
903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
904#[cfg_attr(test, assert_instr(vpsubb))]
905pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
906    unsafe {
907        let sub = _mm512_sub_epi8(a, b).as_i8x64();
908        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
909    }
910}
911
912/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
913///
914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
915#[inline]
916#[target_feature(enable = "avx512bw,avx512vl")]
917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
918#[cfg_attr(test, assert_instr(vpsubb))]
919pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
920    unsafe {
921        let sub = _mm256_sub_epi8(a, b).as_i8x32();
922        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
923    }
924}
925
926/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
927///
928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
929#[inline]
930#[target_feature(enable = "avx512bw,avx512vl")]
931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
932#[cfg_attr(test, assert_instr(vpsubb))]
933pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
934    unsafe {
935        let sub = _mm256_sub_epi8(a, b).as_i8x32();
936        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
937    }
938}
939
940/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
941///
942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
943#[inline]
944#[target_feature(enable = "avx512bw,avx512vl")]
945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
946#[cfg_attr(test, assert_instr(vpsubb))]
947pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
948    unsafe {
949        let sub = _mm_sub_epi8(a, b).as_i8x16();
950        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
951    }
952}
953
954/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
955///
956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
957#[inline]
958#[target_feature(enable = "avx512bw,avx512vl")]
959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
960#[cfg_attr(test, assert_instr(vpsubb))]
961pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
962    unsafe {
963        let sub = _mm_sub_epi8(a, b).as_i8x16();
964        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
965    }
966}
967
968/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
969///
970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
971#[inline]
972#[target_feature(enable = "avx512bw")]
973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
974#[cfg_attr(test, assert_instr(vpsubusw))]
975pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
976    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
977}
978
979/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
980///
981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
982#[inline]
983#[target_feature(enable = "avx512bw")]
984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
985#[cfg_attr(test, assert_instr(vpsubusw))]
986pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
987    unsafe {
988        let sub = _mm512_subs_epu16(a, b).as_u16x32();
989        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
990    }
991}
992
993/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
996#[inline]
997#[target_feature(enable = "avx512bw")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubusw))]
1000pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1001    unsafe {
1002        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1003        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1004    }
1005}
1006
1007/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1010#[inline]
1011#[target_feature(enable = "avx512bw,avx512vl")]
1012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1013#[cfg_attr(test, assert_instr(vpsubusw))]
1014pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1015    unsafe {
1016        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1017        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1018    }
1019}
1020
1021/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1022///
1023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1024#[inline]
1025#[target_feature(enable = "avx512bw,avx512vl")]
1026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1027#[cfg_attr(test, assert_instr(vpsubusw))]
1028pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1029    unsafe {
1030        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1031        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1032    }
1033}
1034
1035/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1036///
1037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1038#[inline]
1039#[target_feature(enable = "avx512bw,avx512vl")]
1040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1041#[cfg_attr(test, assert_instr(vpsubusw))]
1042pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1043    unsafe {
1044        let sub = _mm_subs_epu16(a, b).as_u16x8();
1045        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1046    }
1047}
1048
1049/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1050///
1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1052#[inline]
1053#[target_feature(enable = "avx512bw,avx512vl")]
1054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1055#[cfg_attr(test, assert_instr(vpsubusw))]
1056pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1057    unsafe {
1058        let sub = _mm_subs_epu16(a, b).as_u16x8();
1059        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1060    }
1061}
1062
1063/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1064///
1065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1066#[inline]
1067#[target_feature(enable = "avx512bw")]
1068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1069#[cfg_attr(test, assert_instr(vpsubusb))]
1070pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1071    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1072}
1073
1074/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1075///
1076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1077#[inline]
1078#[target_feature(enable = "avx512bw")]
1079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1080#[cfg_attr(test, assert_instr(vpsubusb))]
1081pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1082    unsafe {
1083        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1084        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1085    }
1086}
1087
1088/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1089///
1090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1091#[inline]
1092#[target_feature(enable = "avx512bw")]
1093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1094#[cfg_attr(test, assert_instr(vpsubusb))]
1095pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1096    unsafe {
1097        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1098        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1099    }
1100}
1101
1102/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1103///
1104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1105#[inline]
1106#[target_feature(enable = "avx512bw,avx512vl")]
1107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1108#[cfg_attr(test, assert_instr(vpsubusb))]
1109pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1110    unsafe {
1111        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1112        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1113    }
1114}
1115
1116/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1117///
1118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1119#[inline]
1120#[target_feature(enable = "avx512bw,avx512vl")]
1121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1122#[cfg_attr(test, assert_instr(vpsubusb))]
1123pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1124    unsafe {
1125        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1126        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1127    }
1128}
1129
1130/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1131///
1132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1133#[inline]
1134#[target_feature(enable = "avx512bw,avx512vl")]
1135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1136#[cfg_attr(test, assert_instr(vpsubusb))]
1137pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1138    unsafe {
1139        let sub = _mm_subs_epu8(a, b).as_u8x16();
1140        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1141    }
1142}
1143
1144/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1145///
1146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1147#[inline]
1148#[target_feature(enable = "avx512bw,avx512vl")]
1149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1150#[cfg_attr(test, assert_instr(vpsubusb))]
1151pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1152    unsafe {
1153        let sub = _mm_subs_epu8(a, b).as_u8x16();
1154        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1155    }
1156}
1157
1158/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1159///
1160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1161#[inline]
1162#[target_feature(enable = "avx512bw")]
1163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1164#[cfg_attr(test, assert_instr(vpsubsw))]
1165pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1166    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1167}
1168
1169/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1170///
1171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1172#[inline]
1173#[target_feature(enable = "avx512bw")]
1174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1175#[cfg_attr(test, assert_instr(vpsubsw))]
1176pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1177    unsafe {
1178        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1179        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1180    }
1181}
1182
1183/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184///
1185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1186#[inline]
1187#[target_feature(enable = "avx512bw")]
1188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1189#[cfg_attr(test, assert_instr(vpsubsw))]
1190pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1191    unsafe {
1192        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1193        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1194    }
1195}
1196
1197/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubsw))]
1204pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1205    unsafe {
1206        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1207        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1208    }
1209}
1210
1211/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1212///
1213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1214#[inline]
1215#[target_feature(enable = "avx512bw,avx512vl")]
1216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1217#[cfg_attr(test, assert_instr(vpsubsw))]
1218pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1219    unsafe {
1220        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1221        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1222    }
1223}
1224
1225/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1226///
1227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1228#[inline]
1229#[target_feature(enable = "avx512bw,avx512vl")]
1230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1231#[cfg_attr(test, assert_instr(vpsubsw))]
1232pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1233    unsafe {
1234        let sub = _mm_subs_epi16(a, b).as_i16x8();
1235        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1236    }
1237}
1238
1239/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1240///
1241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1242#[inline]
1243#[target_feature(enable = "avx512bw,avx512vl")]
1244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1245#[cfg_attr(test, assert_instr(vpsubsw))]
1246pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1247    unsafe {
1248        let sub = _mm_subs_epi16(a, b).as_i16x8();
1249        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1250    }
1251}
1252
1253/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1254///
1255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1256#[inline]
1257#[target_feature(enable = "avx512bw")]
1258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1259#[cfg_attr(test, assert_instr(vpsubsb))]
1260pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1261    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1262}
1263
1264/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1265///
1266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1267#[inline]
1268#[target_feature(enable = "avx512bw")]
1269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1270#[cfg_attr(test, assert_instr(vpsubsb))]
1271pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1272    unsafe {
1273        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1274        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1275    }
1276}
1277
1278/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1279///
1280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1281#[inline]
1282#[target_feature(enable = "avx512bw")]
1283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1284#[cfg_attr(test, assert_instr(vpsubsb))]
1285pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1286    unsafe {
1287        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1288        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1289    }
1290}
1291
1292/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1293///
1294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1295#[inline]
1296#[target_feature(enable = "avx512bw,avx512vl")]
1297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1298#[cfg_attr(test, assert_instr(vpsubsb))]
1299pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1300    unsafe {
1301        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1302        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1303    }
1304}
1305
1306/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1307///
1308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1309#[inline]
1310#[target_feature(enable = "avx512bw,avx512vl")]
1311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1312#[cfg_attr(test, assert_instr(vpsubsb))]
1313pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1314    unsafe {
1315        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1316        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1317    }
1318}
1319
1320/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1321///
1322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1323#[inline]
1324#[target_feature(enable = "avx512bw,avx512vl")]
1325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1326#[cfg_attr(test, assert_instr(vpsubsb))]
1327pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1328    unsafe {
1329        let sub = _mm_subs_epi8(a, b).as_i8x16();
1330        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1331    }
1332}
1333
1334/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1335///
1336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1337#[inline]
1338#[target_feature(enable = "avx512bw,avx512vl")]
1339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1340#[cfg_attr(test, assert_instr(vpsubsb))]
1341pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1342    unsafe {
1343        let sub = _mm_subs_epi8(a, b).as_i8x16();
1344        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1345    }
1346}
1347
1348/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1349///
1350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1351#[inline]
1352#[target_feature(enable = "avx512bw")]
1353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1354#[cfg_attr(test, assert_instr(vpmulhuw))]
1355pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1356    unsafe {
1357        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1358        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1359        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1360        transmute(simd_cast::<u32x32, u16x32>(r))
1361    }
1362}
1363
1364/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1365///
1366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1367#[inline]
1368#[target_feature(enable = "avx512bw")]
1369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1370#[cfg_attr(test, assert_instr(vpmulhuw))]
1371pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1372    unsafe {
1373        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1374        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1375    }
1376}
1377
1378/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1379///
1380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1381#[inline]
1382#[target_feature(enable = "avx512bw")]
1383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1384#[cfg_attr(test, assert_instr(vpmulhuw))]
1385pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1386    unsafe {
1387        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1388        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1389    }
1390}
1391
1392/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1393///
1394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1395#[inline]
1396#[target_feature(enable = "avx512bw,avx512vl")]
1397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1398#[cfg_attr(test, assert_instr(vpmulhuw))]
1399pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1400    unsafe {
1401        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1402        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1403    }
1404}
1405
1406/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1407///
1408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1409#[inline]
1410#[target_feature(enable = "avx512bw,avx512vl")]
1411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1412#[cfg_attr(test, assert_instr(vpmulhuw))]
1413pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1414    unsafe {
1415        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1416        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1417    }
1418}
1419
1420/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1421///
1422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1423#[inline]
1424#[target_feature(enable = "avx512bw,avx512vl")]
1425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1426#[cfg_attr(test, assert_instr(vpmulhuw))]
1427pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1428    unsafe {
1429        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1430        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1431    }
1432}
1433
1434/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1435///
1436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1437#[inline]
1438#[target_feature(enable = "avx512bw,avx512vl")]
1439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1440#[cfg_attr(test, assert_instr(vpmulhuw))]
1441pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1442    unsafe {
1443        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1444        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1445    }
1446}
1447
1448/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1449///
1450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1451#[inline]
1452#[target_feature(enable = "avx512bw")]
1453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1454#[cfg_attr(test, assert_instr(vpmulhw))]
1455pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1456    unsafe {
1457        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1458        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1459        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1460        transmute(simd_cast::<i32x32, i16x32>(r))
1461    }
1462}
1463
1464/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1465///
1466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1467#[inline]
1468#[target_feature(enable = "avx512bw")]
1469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1470#[cfg_attr(test, assert_instr(vpmulhw))]
1471pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1472    unsafe {
1473        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1474        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1475    }
1476}
1477
1478/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1479///
1480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1481#[inline]
1482#[target_feature(enable = "avx512bw")]
1483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1484#[cfg_attr(test, assert_instr(vpmulhw))]
1485pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1486    unsafe {
1487        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1488        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1489    }
1490}
1491
1492/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1493///
1494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1495#[inline]
1496#[target_feature(enable = "avx512bw,avx512vl")]
1497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1498#[cfg_attr(test, assert_instr(vpmulhw))]
1499pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1500    unsafe {
1501        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1502        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1503    }
1504}
1505
1506/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1507///
1508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1509#[inline]
1510#[target_feature(enable = "avx512bw,avx512vl")]
1511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1512#[cfg_attr(test, assert_instr(vpmulhw))]
1513pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1514    unsafe {
1515        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1516        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1517    }
1518}
1519
1520/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1521///
1522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1523#[inline]
1524#[target_feature(enable = "avx512bw,avx512vl")]
1525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1526#[cfg_attr(test, assert_instr(vpmulhw))]
1527pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1528    unsafe {
1529        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1530        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1531    }
1532}
1533
1534/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1535///
1536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1537#[inline]
1538#[target_feature(enable = "avx512bw,avx512vl")]
1539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1540#[cfg_attr(test, assert_instr(vpmulhw))]
1541pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1544        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1545    }
1546}
1547
1548/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1551#[inline]
1552#[target_feature(enable = "avx512bw")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhrsw))]
1555pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1556    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1557}
1558
1559/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1560///
1561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1562#[inline]
1563#[target_feature(enable = "avx512bw")]
1564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1565#[cfg_attr(test, assert_instr(vpmulhrsw))]
1566pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1567    unsafe {
1568        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1569        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1570    }
1571}
1572
1573/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1574///
1575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1576#[inline]
1577#[target_feature(enable = "avx512bw")]
1578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1579#[cfg_attr(test, assert_instr(vpmulhrsw))]
1580pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1581    unsafe {
1582        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1583        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1584    }
1585}
1586
1587/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1588///
1589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1590#[inline]
1591#[target_feature(enable = "avx512bw,avx512vl")]
1592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1593#[cfg_attr(test, assert_instr(vpmulhrsw))]
1594pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1595    unsafe {
1596        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1597        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1598    }
1599}
1600
1601/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1602///
1603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1604#[inline]
1605#[target_feature(enable = "avx512bw,avx512vl")]
1606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1607#[cfg_attr(test, assert_instr(vpmulhrsw))]
1608pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1609    unsafe {
1610        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1611        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1612    }
1613}
1614
1615/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhrsw))]
1622pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1623    unsafe {
1624        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1625        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1626    }
1627}
1628
1629/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1630///
1631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1632#[inline]
1633#[target_feature(enable = "avx512bw,avx512vl")]
1634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1635#[cfg_attr(test, assert_instr(vpmulhrsw))]
1636pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1637    unsafe {
1638        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1639        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1640    }
1641}
1642
1643/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1644///
1645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1646#[inline]
1647#[target_feature(enable = "avx512bw")]
1648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1649#[cfg_attr(test, assert_instr(vpmullw))]
1650pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1651    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1652}
1653
1654/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1655///
1656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1657#[inline]
1658#[target_feature(enable = "avx512bw")]
1659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1660#[cfg_attr(test, assert_instr(vpmullw))]
1661pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1662    unsafe {
1663        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1664        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1665    }
1666}
1667
1668/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1671#[inline]
1672#[target_feature(enable = "avx512bw")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmullw))]
1675pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1676    unsafe {
1677        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1678        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1679    }
1680}
1681
1682/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1683///
1684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1685#[inline]
1686#[target_feature(enable = "avx512bw,avx512vl")]
1687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1688#[cfg_attr(test, assert_instr(vpmullw))]
1689pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1690    unsafe {
1691        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1692        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1693    }
1694}
1695
1696/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1697///
1698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1699#[inline]
1700#[target_feature(enable = "avx512bw,avx512vl")]
1701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1702#[cfg_attr(test, assert_instr(vpmullw))]
1703pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1704    unsafe {
1705        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1706        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1707    }
1708}
1709
1710/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1711///
1712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1713#[inline]
1714#[target_feature(enable = "avx512bw,avx512vl")]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[cfg_attr(test, assert_instr(vpmullw))]
1717pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1718    unsafe {
1719        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1720        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1721    }
1722}
1723
1724/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1725///
1726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1727#[inline]
1728#[target_feature(enable = "avx512bw,avx512vl")]
1729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1730#[cfg_attr(test, assert_instr(vpmullw))]
1731pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1732    unsafe {
1733        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1734        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1735    }
1736}
1737
1738/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1739///
1740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1741#[inline]
1742#[target_feature(enable = "avx512bw")]
1743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1744#[cfg_attr(test, assert_instr(vpmaxuw))]
1745pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1746    unsafe {
1747        let a = a.as_u16x32();
1748        let b = b.as_u16x32();
1749        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1750    }
1751}
1752
1753/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1754///
1755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1756#[inline]
1757#[target_feature(enable = "avx512bw")]
1758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1759#[cfg_attr(test, assert_instr(vpmaxuw))]
1760pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1761    unsafe {
1762        let max = _mm512_max_epu16(a, b).as_u16x32();
1763        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1764    }
1765}
1766
1767/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1768///
1769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1770#[inline]
1771#[target_feature(enable = "avx512bw")]
1772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1773#[cfg_attr(test, assert_instr(vpmaxuw))]
1774pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1775    unsafe {
1776        let max = _mm512_max_epu16(a, b).as_u16x32();
1777        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1778    }
1779}
1780
1781/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1782///
1783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1784#[inline]
1785#[target_feature(enable = "avx512bw,avx512vl")]
1786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1787#[cfg_attr(test, assert_instr(vpmaxuw))]
1788pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1789    unsafe {
1790        let max = _mm256_max_epu16(a, b).as_u16x16();
1791        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1792    }
1793}
1794
1795/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1796///
1797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1798#[inline]
1799#[target_feature(enable = "avx512bw,avx512vl")]
1800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1801#[cfg_attr(test, assert_instr(vpmaxuw))]
1802pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1803    unsafe {
1804        let max = _mm256_max_epu16(a, b).as_u16x16();
1805        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1806    }
1807}
1808
1809/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1812#[inline]
1813#[target_feature(enable = "avx512bw,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmaxuw))]
1816pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1817    unsafe {
1818        let max = _mm_max_epu16(a, b).as_u16x8();
1819        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1820    }
1821}
1822
1823/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1824///
1825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1826#[inline]
1827#[target_feature(enable = "avx512bw,avx512vl")]
1828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1829#[cfg_attr(test, assert_instr(vpmaxuw))]
1830pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1831    unsafe {
1832        let max = _mm_max_epu16(a, b).as_u16x8();
1833        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1834    }
1835}
1836
1837/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1840#[inline]
1841#[target_feature(enable = "avx512bw")]
1842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1843#[cfg_attr(test, assert_instr(vpmaxub))]
1844pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1845    unsafe {
1846        let a = a.as_u8x64();
1847        let b = b.as_u8x64();
1848        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1849    }
1850}
1851
1852/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1853///
1854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
1855#[inline]
1856#[target_feature(enable = "avx512bw")]
1857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1858#[cfg_attr(test, assert_instr(vpmaxub))]
1859pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1860    unsafe {
1861        let max = _mm512_max_epu8(a, b).as_u8x64();
1862        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1863    }
1864}
1865
1866/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
1869#[inline]
1870#[target_feature(enable = "avx512bw")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vpmaxub))]
1873pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1874    unsafe {
1875        let max = _mm512_max_epu8(a, b).as_u8x64();
1876        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
1877    }
1878}
1879
1880/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1881///
1882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
1883#[inline]
1884#[target_feature(enable = "avx512bw,avx512vl")]
1885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1886#[cfg_attr(test, assert_instr(vpmaxub))]
1887pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1888    unsafe {
1889        let max = _mm256_max_epu8(a, b).as_u8x32();
1890        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1891    }
1892}
1893
1894/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1895///
1896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
1897#[inline]
1898#[target_feature(enable = "avx512bw,avx512vl")]
1899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1900#[cfg_attr(test, assert_instr(vpmaxub))]
1901pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1902    unsafe {
1903        let max = _mm256_max_epu8(a, b).as_u8x32();
1904        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
1905    }
1906}
1907
1908/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1909///
1910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
1911#[inline]
1912#[target_feature(enable = "avx512bw,avx512vl")]
1913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1914#[cfg_attr(test, assert_instr(vpmaxub))]
1915pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1916    unsafe {
1917        let max = _mm_max_epu8(a, b).as_u8x16();
1918        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1919    }
1920}
1921
1922/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1923///
1924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
1925#[inline]
1926#[target_feature(enable = "avx512bw,avx512vl")]
1927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1928#[cfg_attr(test, assert_instr(vpmaxub))]
1929pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1930    unsafe {
1931        let max = _mm_max_epu8(a, b).as_u8x16();
1932        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
1933    }
1934}
1935
1936/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1937///
1938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
1939#[inline]
1940#[target_feature(enable = "avx512bw")]
1941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1942#[cfg_attr(test, assert_instr(vpmaxsw))]
1943pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1944    unsafe {
1945        let a = a.as_i16x32();
1946        let b = b.as_i16x32();
1947        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1948    }
1949}
1950
1951/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1952///
1953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
1954#[inline]
1955#[target_feature(enable = "avx512bw")]
1956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1957#[cfg_attr(test, assert_instr(vpmaxsw))]
1958pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1959    unsafe {
1960        let max = _mm512_max_epi16(a, b).as_i16x32();
1961        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1962    }
1963}
1964
1965/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1966///
1967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
1968#[inline]
1969#[target_feature(enable = "avx512bw")]
1970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1971#[cfg_attr(test, assert_instr(vpmaxsw))]
1972pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1973    unsafe {
1974        let max = _mm512_max_epi16(a, b).as_i16x32();
1975        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
1976    }
1977}
1978
1979/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1980///
1981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
1982#[inline]
1983#[target_feature(enable = "avx512bw,avx512vl")]
1984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1985#[cfg_attr(test, assert_instr(vpmaxsw))]
1986pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1987    unsafe {
1988        let max = _mm256_max_epi16(a, b).as_i16x16();
1989        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1990    }
1991}
1992
1993/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1994///
1995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
1996#[inline]
1997#[target_feature(enable = "avx512bw,avx512vl")]
1998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1999#[cfg_attr(test, assert_instr(vpmaxsw))]
2000pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2001    unsafe {
2002        let max = _mm256_max_epi16(a, b).as_i16x16();
2003        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2004    }
2005}
2006
2007/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2008///
2009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2010#[inline]
2011#[target_feature(enable = "avx512bw,avx512vl")]
2012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2013#[cfg_attr(test, assert_instr(vpmaxsw))]
2014pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2015    unsafe {
2016        let max = _mm_max_epi16(a, b).as_i16x8();
2017        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2018    }
2019}
2020
2021/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2022///
2023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2024#[inline]
2025#[target_feature(enable = "avx512bw,avx512vl")]
2026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2027#[cfg_attr(test, assert_instr(vpmaxsw))]
2028pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2029    unsafe {
2030        let max = _mm_max_epi16(a, b).as_i16x8();
2031        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2032    }
2033}
2034
2035/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2036///
2037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2038#[inline]
2039#[target_feature(enable = "avx512bw")]
2040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2041#[cfg_attr(test, assert_instr(vpmaxsb))]
2042pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2043    unsafe {
2044        let a = a.as_i8x64();
2045        let b = b.as_i8x64();
2046        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
2047    }
2048}
2049
2050/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2051///
2052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2053#[inline]
2054#[target_feature(enable = "avx512bw")]
2055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2056#[cfg_attr(test, assert_instr(vpmaxsb))]
2057pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2058    unsafe {
2059        let max = _mm512_max_epi8(a, b).as_i8x64();
2060        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2061    }
2062}
2063
2064/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2065///
2066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2067#[inline]
2068#[target_feature(enable = "avx512bw")]
2069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2070#[cfg_attr(test, assert_instr(vpmaxsb))]
2071pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2072    unsafe {
2073        let max = _mm512_max_epi8(a, b).as_i8x64();
2074        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2075    }
2076}
2077
2078/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2079///
2080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2081#[inline]
2082#[target_feature(enable = "avx512bw,avx512vl")]
2083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2084#[cfg_attr(test, assert_instr(vpmaxsb))]
2085pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2086    unsafe {
2087        let max = _mm256_max_epi8(a, b).as_i8x32();
2088        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2089    }
2090}
2091
2092/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2093///
2094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2095#[inline]
2096#[target_feature(enable = "avx512bw,avx512vl")]
2097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2098#[cfg_attr(test, assert_instr(vpmaxsb))]
2099pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2100    unsafe {
2101        let max = _mm256_max_epi8(a, b).as_i8x32();
2102        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2103    }
2104}
2105
2106/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2107///
2108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2109#[inline]
2110#[target_feature(enable = "avx512bw,avx512vl")]
2111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2112#[cfg_attr(test, assert_instr(vpmaxsb))]
2113pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2114    unsafe {
2115        let max = _mm_max_epi8(a, b).as_i8x16();
2116        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2117    }
2118}
2119
2120/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2121///
2122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2123#[inline]
2124#[target_feature(enable = "avx512bw,avx512vl")]
2125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2126#[cfg_attr(test, assert_instr(vpmaxsb))]
2127pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2128    unsafe {
2129        let max = _mm_max_epi8(a, b).as_i8x16();
2130        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2131    }
2132}
2133
2134/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2135///
2136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2137#[inline]
2138#[target_feature(enable = "avx512bw")]
2139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2140#[cfg_attr(test, assert_instr(vpminuw))]
2141pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2142    unsafe {
2143        let a = a.as_u16x32();
2144        let b = b.as_u16x32();
2145        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2146    }
2147}
2148
2149/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2150///
2151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2152#[inline]
2153#[target_feature(enable = "avx512bw")]
2154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2155#[cfg_attr(test, assert_instr(vpminuw))]
2156pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2157    unsafe {
2158        let min = _mm512_min_epu16(a, b).as_u16x32();
2159        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2160    }
2161}
2162
2163/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2166#[inline]
2167#[target_feature(enable = "avx512bw")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpminuw))]
2170pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2171    unsafe {
2172        let min = _mm512_min_epu16(a, b).as_u16x32();
2173        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2174    }
2175}
2176
2177/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2178///
2179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2180#[inline]
2181#[target_feature(enable = "avx512bw,avx512vl")]
2182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2183#[cfg_attr(test, assert_instr(vpminuw))]
2184pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2185    unsafe {
2186        let min = _mm256_min_epu16(a, b).as_u16x16();
2187        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2188    }
2189}
2190
2191/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2192///
2193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2194#[inline]
2195#[target_feature(enable = "avx512bw,avx512vl")]
2196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2197#[cfg_attr(test, assert_instr(vpminuw))]
2198pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2199    unsafe {
2200        let min = _mm256_min_epu16(a, b).as_u16x16();
2201        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2202    }
2203}
2204
2205/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2208#[inline]
2209#[target_feature(enable = "avx512bw,avx512vl")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpminuw))]
2212pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2213    unsafe {
2214        let min = _mm_min_epu16(a, b).as_u16x8();
2215        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2216    }
2217}
2218
2219/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2220///
2221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2222#[inline]
2223#[target_feature(enable = "avx512bw,avx512vl")]
2224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2225#[cfg_attr(test, assert_instr(vpminuw))]
2226pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2227    unsafe {
2228        let min = _mm_min_epu16(a, b).as_u16x8();
2229        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2230    }
2231}
2232
2233/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2234///
2235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2236#[inline]
2237#[target_feature(enable = "avx512bw")]
2238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2239#[cfg_attr(test, assert_instr(vpminub))]
2240pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2241    unsafe {
2242        let a = a.as_u8x64();
2243        let b = b.as_u8x64();
2244        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2245    }
2246}
2247
2248/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2249///
2250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2251#[inline]
2252#[target_feature(enable = "avx512bw")]
2253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2254#[cfg_attr(test, assert_instr(vpminub))]
2255pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2256    unsafe {
2257        let min = _mm512_min_epu8(a, b).as_u8x64();
2258        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2259    }
2260}
2261
2262/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2265#[inline]
2266#[target_feature(enable = "avx512bw")]
2267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2268#[cfg_attr(test, assert_instr(vpminub))]
2269pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2270    unsafe {
2271        let min = _mm512_min_epu8(a, b).as_u8x64();
2272        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2273    }
2274}
2275
2276/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2277///
2278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2279#[inline]
2280#[target_feature(enable = "avx512bw,avx512vl")]
2281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2282#[cfg_attr(test, assert_instr(vpminub))]
2283pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2284    unsafe {
2285        let min = _mm256_min_epu8(a, b).as_u8x32();
2286        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2287    }
2288}
2289
2290/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2291///
2292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2293#[inline]
2294#[target_feature(enable = "avx512bw,avx512vl")]
2295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2296#[cfg_attr(test, assert_instr(vpminub))]
2297pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2298    unsafe {
2299        let min = _mm256_min_epu8(a, b).as_u8x32();
2300        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2301    }
2302}
2303
2304/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2307#[inline]
2308#[target_feature(enable = "avx512bw,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpminub))]
2311pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2312    unsafe {
2313        let min = _mm_min_epu8(a, b).as_u8x16();
2314        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2315    }
2316}
2317
2318/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2319///
2320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2321#[inline]
2322#[target_feature(enable = "avx512bw,avx512vl")]
2323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2324#[cfg_attr(test, assert_instr(vpminub))]
2325pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2326    unsafe {
2327        let min = _mm_min_epu8(a, b).as_u8x16();
2328        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2329    }
2330}
2331
2332/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2333///
2334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2335#[inline]
2336#[target_feature(enable = "avx512bw")]
2337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2338#[cfg_attr(test, assert_instr(vpminsw))]
2339pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2340    unsafe {
2341        let a = a.as_i16x32();
2342        let b = b.as_i16x32();
2343        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2344    }
2345}
2346
2347/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2348///
2349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2350#[inline]
2351#[target_feature(enable = "avx512bw")]
2352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2353#[cfg_attr(test, assert_instr(vpminsw))]
2354pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2355    unsafe {
2356        let min = _mm512_min_epi16(a, b).as_i16x32();
2357        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2358    }
2359}
2360
2361/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2364#[inline]
2365#[target_feature(enable = "avx512bw")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpminsw))]
2368pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2369    unsafe {
2370        let min = _mm512_min_epi16(a, b).as_i16x32();
2371        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2372    }
2373}
2374
2375/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2376///
2377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2378#[inline]
2379#[target_feature(enable = "avx512bw,avx512vl")]
2380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2381#[cfg_attr(test, assert_instr(vpminsw))]
2382pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2383    unsafe {
2384        let min = _mm256_min_epi16(a, b).as_i16x16();
2385        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2386    }
2387}
2388
2389/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2390///
2391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2392#[inline]
2393#[target_feature(enable = "avx512bw,avx512vl")]
2394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2395#[cfg_attr(test, assert_instr(vpminsw))]
2396pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2397    unsafe {
2398        let min = _mm256_min_epi16(a, b).as_i16x16();
2399        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2400    }
2401}
2402
2403/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2406#[inline]
2407#[target_feature(enable = "avx512bw,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpminsw))]
2410pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2411    unsafe {
2412        let min = _mm_min_epi16(a, b).as_i16x8();
2413        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2414    }
2415}
2416
2417/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2418///
2419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2420#[inline]
2421#[target_feature(enable = "avx512bw,avx512vl")]
2422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2423#[cfg_attr(test, assert_instr(vpminsw))]
2424pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2425    unsafe {
2426        let min = _mm_min_epi16(a, b).as_i16x8();
2427        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2428    }
2429}
2430
2431/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2432///
2433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2434#[inline]
2435#[target_feature(enable = "avx512bw")]
2436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2437#[cfg_attr(test, assert_instr(vpminsb))]
2438pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2439    unsafe {
2440        let a = a.as_i8x64();
2441        let b = b.as_i8x64();
2442        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2443    }
2444}
2445
2446/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2447///
2448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2449#[inline]
2450#[target_feature(enable = "avx512bw")]
2451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2452#[cfg_attr(test, assert_instr(vpminsb))]
2453pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2454    unsafe {
2455        let min = _mm512_min_epi8(a, b).as_i8x64();
2456        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2457    }
2458}
2459
2460/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2463#[inline]
2464#[target_feature(enable = "avx512bw")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpminsb))]
2467pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2468    unsafe {
2469        let min = _mm512_min_epi8(a, b).as_i8x64();
2470        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2471    }
2472}
2473
2474/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2475///
2476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2477#[inline]
2478#[target_feature(enable = "avx512bw,avx512vl")]
2479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2480#[cfg_attr(test, assert_instr(vpminsb))]
2481pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2482    unsafe {
2483        let min = _mm256_min_epi8(a, b).as_i8x32();
2484        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2485    }
2486}
2487
2488/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2489///
2490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2491#[inline]
2492#[target_feature(enable = "avx512bw,avx512vl")]
2493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2494#[cfg_attr(test, assert_instr(vpminsb))]
2495pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2496    unsafe {
2497        let min = _mm256_min_epi8(a, b).as_i8x32();
2498        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2499    }
2500}
2501
2502/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2503///
2504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2505#[inline]
2506#[target_feature(enable = "avx512bw,avx512vl")]
2507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2508#[cfg_attr(test, assert_instr(vpminsb))]
2509pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2510    unsafe {
2511        let min = _mm_min_epi8(a, b).as_i8x16();
2512        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2513    }
2514}
2515
2516/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2517///
2518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2519#[inline]
2520#[target_feature(enable = "avx512bw,avx512vl")]
2521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2522#[cfg_attr(test, assert_instr(vpminsb))]
2523pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2524    unsafe {
2525        let min = _mm_min_epi8(a, b).as_i8x16();
2526        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2527    }
2528}
2529
2530/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2531///
2532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2533#[inline]
2534#[target_feature(enable = "avx512bw")]
2535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2536#[cfg_attr(test, assert_instr(vpcmp))]
2537pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2538    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2539}
2540
2541/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2544#[inline]
2545#[target_feature(enable = "avx512bw")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpcmp))]
2548pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2549    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2550}
2551
2552/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2553///
2554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2555#[inline]
2556#[target_feature(enable = "avx512bw,avx512vl")]
2557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2558#[cfg_attr(test, assert_instr(vpcmp))]
2559pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2560    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2561}
2562
2563/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2564///
2565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2566#[inline]
2567#[target_feature(enable = "avx512bw,avx512vl")]
2568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2569#[cfg_attr(test, assert_instr(vpcmp))]
2570pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2571    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2572}
2573
2574/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2575///
2576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2577#[inline]
2578#[target_feature(enable = "avx512bw,avx512vl")]
2579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2580#[cfg_attr(test, assert_instr(vpcmp))]
2581pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2582    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2583}
2584
2585/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2586///
2587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2588#[inline]
2589#[target_feature(enable = "avx512bw,avx512vl")]
2590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2591#[cfg_attr(test, assert_instr(vpcmp))]
2592pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2593    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2594}
2595
2596/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2597///
2598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2599#[inline]
2600#[target_feature(enable = "avx512bw")]
2601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2602#[cfg_attr(test, assert_instr(vpcmp))]
2603pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2604    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2605}
2606
2607/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2608///
2609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2610#[inline]
2611#[target_feature(enable = "avx512bw")]
2612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2613#[cfg_attr(test, assert_instr(vpcmp))]
2614pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2615    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2616}
2617
2618/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2619///
2620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2621#[inline]
2622#[target_feature(enable = "avx512bw,avx512vl")]
2623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2624#[cfg_attr(test, assert_instr(vpcmp))]
2625pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2626    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2627}
2628
2629/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2632#[inline]
2633#[target_feature(enable = "avx512bw,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vpcmp))]
2636pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2637    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2638}
2639
2640/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2641///
2642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2643#[inline]
2644#[target_feature(enable = "avx512bw,avx512vl")]
2645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2646#[cfg_attr(test, assert_instr(vpcmp))]
2647pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2648    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2649}
2650
2651/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2652///
2653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2654#[inline]
2655#[target_feature(enable = "avx512bw,avx512vl")]
2656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2657#[cfg_attr(test, assert_instr(vpcmp))]
2658pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2659    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2660}
2661
2662/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2663///
2664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2665#[inline]
2666#[target_feature(enable = "avx512bw")]
2667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2668#[cfg_attr(test, assert_instr(vpcmp))]
2669pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2670    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2671}
2672
2673/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2676#[inline]
2677#[target_feature(enable = "avx512bw")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpcmp))]
2680pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2681    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2682}
2683
2684/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2685///
2686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2687#[inline]
2688#[target_feature(enable = "avx512bw,avx512vl")]
2689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2690#[cfg_attr(test, assert_instr(vpcmp))]
2691pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2692    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2693}
2694
2695/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2698#[inline]
2699#[target_feature(enable = "avx512bw,avx512vl")]
2700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2701#[cfg_attr(test, assert_instr(vpcmp))]
2702pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2703    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2704}
2705
2706/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2707///
2708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2709#[inline]
2710#[target_feature(enable = "avx512bw,avx512vl")]
2711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2712#[cfg_attr(test, assert_instr(vpcmp))]
2713pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2714    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2715}
2716
2717/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2718///
2719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2720#[inline]
2721#[target_feature(enable = "avx512bw,avx512vl")]
2722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2723#[cfg_attr(test, assert_instr(vpcmp))]
2724pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2725    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2726}
2727
2728/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2731#[inline]
2732#[target_feature(enable = "avx512bw")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpcmp))]
2735pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2736    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2737}
2738
2739/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2742#[inline]
2743#[target_feature(enable = "avx512bw")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2747    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2748}
2749
2750/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2751///
2752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2753#[inline]
2754#[target_feature(enable = "avx512bw,avx512vl")]
2755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2756#[cfg_attr(test, assert_instr(vpcmp))]
2757pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2758    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2759}
2760
2761/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2762///
2763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2764#[inline]
2765#[target_feature(enable = "avx512bw,avx512vl")]
2766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2767#[cfg_attr(test, assert_instr(vpcmp))]
2768pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2769    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2770}
2771
2772/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2773///
2774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2775#[inline]
2776#[target_feature(enable = "avx512bw,avx512vl")]
2777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2778#[cfg_attr(test, assert_instr(vpcmp))]
2779pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2780    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2781}
2782
2783/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2784///
2785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2786#[inline]
2787#[target_feature(enable = "avx512bw,avx512vl")]
2788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2789#[cfg_attr(test, assert_instr(vpcmp))]
2790pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2791    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2792}
2793
2794/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2795///
2796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2797#[inline]
2798#[target_feature(enable = "avx512bw")]
2799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2800#[cfg_attr(test, assert_instr(vpcmp))]
2801pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2802    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
2803}
2804
2805/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2806///
2807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
2808#[inline]
2809#[target_feature(enable = "avx512bw")]
2810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2811#[cfg_attr(test, assert_instr(vpcmp))]
2812pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2813    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2814}
2815
2816/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2817///
2818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
2819#[inline]
2820#[target_feature(enable = "avx512bw,avx512vl")]
2821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2822#[cfg_attr(test, assert_instr(vpcmp))]
2823pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2824    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
2825}
2826
2827/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
2830#[inline]
2831#[target_feature(enable = "avx512bw,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpcmp))]
2834pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2835    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2836}
2837
2838/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2839///
2840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
2841#[inline]
2842#[target_feature(enable = "avx512bw,avx512vl")]
2843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2844#[cfg_attr(test, assert_instr(vpcmp))]
2845pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2846    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
2847}
2848
2849/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2850///
2851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
2852#[inline]
2853#[target_feature(enable = "avx512bw,avx512vl")]
2854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2855#[cfg_attr(test, assert_instr(vpcmp))]
2856pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2857    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2858}
2859
2860/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2861///
2862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
2863#[inline]
2864#[target_feature(enable = "avx512bw")]
2865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2866#[cfg_attr(test, assert_instr(vpcmp))]
2867pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2868    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
2869}
2870
2871/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
2874#[inline]
2875#[target_feature(enable = "avx512bw")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2879    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2880}
2881
2882/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2883///
2884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
2885#[inline]
2886#[target_feature(enable = "avx512bw,avx512vl")]
2887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2888#[cfg_attr(test, assert_instr(vpcmp))]
2889pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2890    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
2891}
2892
2893/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2894///
2895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
2896#[inline]
2897#[target_feature(enable = "avx512bw,avx512vl")]
2898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2899#[cfg_attr(test, assert_instr(vpcmp))]
2900pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2901    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2902}
2903
2904/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2905///
2906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
2907#[inline]
2908#[target_feature(enable = "avx512bw,avx512vl")]
2909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2910#[cfg_attr(test, assert_instr(vpcmp))]
2911pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2912    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
2913}
2914
2915/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2916///
2917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
2918#[inline]
2919#[target_feature(enable = "avx512bw,avx512vl")]
2920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2921#[cfg_attr(test, assert_instr(vpcmp))]
2922pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2923    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2924}
2925
2926/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
2929#[inline]
2930#[target_feature(enable = "avx512bw")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpcmp))]
2933pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2934    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
2935}
2936
2937/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2938///
2939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
2940#[inline]
2941#[target_feature(enable = "avx512bw")]
2942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2943#[cfg_attr(test, assert_instr(vpcmp))]
2944pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2945    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2946}
2947
2948/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2949///
2950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
2951#[inline]
2952#[target_feature(enable = "avx512bw,avx512vl")]
2953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2954#[cfg_attr(test, assert_instr(vpcmp))]
2955pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2956    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
2957}
2958
2959/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2960///
2961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
2962#[inline]
2963#[target_feature(enable = "avx512bw,avx512vl")]
2964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2965#[cfg_attr(test, assert_instr(vpcmp))]
2966pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2967    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2968}
2969
2970/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2971///
2972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
2973#[inline]
2974#[target_feature(enable = "avx512bw,avx512vl")]
2975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2976#[cfg_attr(test, assert_instr(vpcmp))]
2977pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2978    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
2979}
2980
2981/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2982///
2983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
2984#[inline]
2985#[target_feature(enable = "avx512bw,avx512vl")]
2986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2987#[cfg_attr(test, assert_instr(vpcmp))]
2988pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2989    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2990}
2991
2992/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2993///
2994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
2995#[inline]
2996#[target_feature(enable = "avx512bw")]
2997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2998#[cfg_attr(test, assert_instr(vpcmp))]
2999pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3000    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3001}
3002
3003/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3011    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3012}
3013
3014/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3017#[inline]
3018#[target_feature(enable = "avx512bw,avx512vl")]
3019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3020#[cfg_attr(test, assert_instr(vpcmp))]
3021pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3022    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3023}
3024
3025/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3028#[inline]
3029#[target_feature(enable = "avx512bw,avx512vl")]
3030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3031#[cfg_attr(test, assert_instr(vpcmp))]
3032pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3033    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3034}
3035
3036/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3037///
3038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3039#[inline]
3040#[target_feature(enable = "avx512bw,avx512vl")]
3041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3042#[cfg_attr(test, assert_instr(vpcmp))]
3043pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3044    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3045}
3046
3047/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3048///
3049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3050#[inline]
3051#[target_feature(enable = "avx512bw,avx512vl")]
3052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3053#[cfg_attr(test, assert_instr(vpcmp))]
3054pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3055    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3056}
3057
3058/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3059///
3060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3061#[inline]
3062#[target_feature(enable = "avx512bw")]
3063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3064#[cfg_attr(test, assert_instr(vpcmp))]
3065pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3066    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3067}
3068
3069/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3070///
3071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3072#[inline]
3073#[target_feature(enable = "avx512bw")]
3074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3075#[cfg_attr(test, assert_instr(vpcmp))]
3076pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3077    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3078}
3079
3080/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3081///
3082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3083#[inline]
3084#[target_feature(enable = "avx512bw,avx512vl")]
3085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3086#[cfg_attr(test, assert_instr(vpcmp))]
3087pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3088    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3089}
3090
3091/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3092///
3093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3094#[inline]
3095#[target_feature(enable = "avx512bw,avx512vl")]
3096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3097#[cfg_attr(test, assert_instr(vpcmp))]
3098pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3099    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3100}
3101
3102/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3103///
3104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3105#[inline]
3106#[target_feature(enable = "avx512bw,avx512vl")]
3107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3108#[cfg_attr(test, assert_instr(vpcmp))]
3109pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3110    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3111}
3112
3113/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3114///
3115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3116#[inline]
3117#[target_feature(enable = "avx512bw,avx512vl")]
3118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3119#[cfg_attr(test, assert_instr(vpcmp))]
3120pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3121    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3122}
3123
3124/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3125///
3126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3127#[inline]
3128#[target_feature(enable = "avx512bw")]
3129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3130#[cfg_attr(test, assert_instr(vpcmp))]
3131pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3132    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3133}
3134
3135/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3143    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3144}
3145
3146/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3147///
3148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3149#[inline]
3150#[target_feature(enable = "avx512bw,avx512vl")]
3151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3152#[cfg_attr(test, assert_instr(vpcmp))]
3153pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3154    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3155}
3156
3157/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3158///
3159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3160#[inline]
3161#[target_feature(enable = "avx512bw,avx512vl")]
3162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3163#[cfg_attr(test, assert_instr(vpcmp))]
3164pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3165    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3166}
3167
3168/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3169///
3170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3171#[inline]
3172#[target_feature(enable = "avx512bw,avx512vl")]
3173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3174#[cfg_attr(test, assert_instr(vpcmp))]
3175pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3176    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3177}
3178
3179/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3180///
3181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3182#[inline]
3183#[target_feature(enable = "avx512bw,avx512vl")]
3184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3185#[cfg_attr(test, assert_instr(vpcmp))]
3186pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3187    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3188}
3189
3190/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3191///
3192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3193#[inline]
3194#[target_feature(enable = "avx512bw")]
3195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3196#[cfg_attr(test, assert_instr(vpcmp))]
3197pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3198    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3199}
3200
3201/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3202///
3203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3204#[inline]
3205#[target_feature(enable = "avx512bw")]
3206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3207#[cfg_attr(test, assert_instr(vpcmp))]
3208pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3209    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3210}
3211
3212/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3213///
3214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3215#[inline]
3216#[target_feature(enable = "avx512bw,avx512vl")]
3217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3218#[cfg_attr(test, assert_instr(vpcmp))]
3219pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3220    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3221}
3222
3223/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3224///
3225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3226#[inline]
3227#[target_feature(enable = "avx512bw,avx512vl")]
3228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3229#[cfg_attr(test, assert_instr(vpcmp))]
3230pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3231    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3232}
3233
3234/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3235///
3236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3237#[inline]
3238#[target_feature(enable = "avx512bw,avx512vl")]
3239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3240#[cfg_attr(test, assert_instr(vpcmp))]
3241pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3242    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3243}
3244
3245/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3246///
3247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3248#[inline]
3249#[target_feature(enable = "avx512bw,avx512vl")]
3250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3251#[cfg_attr(test, assert_instr(vpcmp))]
3252pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3253    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3254}
3255
3256/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3257///
3258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3259#[inline]
3260#[target_feature(enable = "avx512bw")]
3261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3262#[cfg_attr(test, assert_instr(vpcmp))]
3263pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3264    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3270#[inline]
3271#[target_feature(enable = "avx512bw")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3275    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3276}
3277
3278/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3279///
3280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3281#[inline]
3282#[target_feature(enable = "avx512bw,avx512vl")]
3283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3284#[cfg_attr(test, assert_instr(vpcmp))]
3285pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3286    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3287}
3288
3289/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3290///
3291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3292#[inline]
3293#[target_feature(enable = "avx512bw,avx512vl")]
3294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3295#[cfg_attr(test, assert_instr(vpcmp))]
3296pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3297    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3298}
3299
3300/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3301///
3302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3303#[inline]
3304#[target_feature(enable = "avx512bw,avx512vl")]
3305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3306#[cfg_attr(test, assert_instr(vpcmp))]
3307pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3308    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3309}
3310
3311/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3312///
3313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3314#[inline]
3315#[target_feature(enable = "avx512bw,avx512vl")]
3316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3317#[cfg_attr(test, assert_instr(vpcmp))]
3318pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3319    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3320}
3321
3322/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3323///
3324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3325#[inline]
3326#[target_feature(enable = "avx512bw")]
3327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3328#[cfg_attr(test, assert_instr(vpcmp))]
3329pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3330    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3331}
3332
3333/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3336#[inline]
3337#[target_feature(enable = "avx512bw")]
3338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3339#[cfg_attr(test, assert_instr(vpcmp))]
3340pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3341    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3342}
3343
3344/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3345///
3346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3347#[inline]
3348#[target_feature(enable = "avx512bw,avx512vl")]
3349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3350#[cfg_attr(test, assert_instr(vpcmp))]
3351pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3352    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3353}
3354
3355/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3356///
3357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3358#[inline]
3359#[target_feature(enable = "avx512bw,avx512vl")]
3360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3361#[cfg_attr(test, assert_instr(vpcmp))]
3362pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3363    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3364}
3365
3366/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3367///
3368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3369#[inline]
3370#[target_feature(enable = "avx512bw,avx512vl")]
3371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3372#[cfg_attr(test, assert_instr(vpcmp))]
3373pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3374    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3375}
3376
3377/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3378///
3379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3380#[inline]
3381#[target_feature(enable = "avx512bw,avx512vl")]
3382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3383#[cfg_attr(test, assert_instr(vpcmp))]
3384pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3385    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3386}
3387
3388/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3389///
3390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3391#[inline]
3392#[target_feature(enable = "avx512bw")]
3393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3394#[cfg_attr(test, assert_instr(vpcmp))]
3395pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3396    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3402#[inline]
3403#[target_feature(enable = "avx512bw")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3407    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3408}
3409
3410/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3413#[inline]
3414#[target_feature(enable = "avx512bw,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpcmp))]
3417pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3418    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3419}
3420
3421/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3422///
3423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3424#[inline]
3425#[target_feature(enable = "avx512bw,avx512vl")]
3426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3427#[cfg_attr(test, assert_instr(vpcmp))]
3428pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3429    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3430}
3431
3432/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3433///
3434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3435#[inline]
3436#[target_feature(enable = "avx512bw,avx512vl")]
3437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3438#[cfg_attr(test, assert_instr(vpcmp))]
3439pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3440    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3441}
3442
3443/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3444///
3445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3446#[inline]
3447#[target_feature(enable = "avx512bw,avx512vl")]
3448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3449#[cfg_attr(test, assert_instr(vpcmp))]
3450pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3451    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3452}
3453
3454/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3455///
3456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3457#[inline]
3458#[target_feature(enable = "avx512bw")]
3459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3460#[cfg_attr(test, assert_instr(vpcmp))]
3461pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3462    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3463}
3464
3465/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3466///
3467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3468#[inline]
3469#[target_feature(enable = "avx512bw")]
3470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3471#[cfg_attr(test, assert_instr(vpcmp))]
3472pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3473    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3474}
3475
3476/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3477///
3478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3479#[inline]
3480#[target_feature(enable = "avx512bw,avx512vl")]
3481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3482#[cfg_attr(test, assert_instr(vpcmp))]
3483pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3484    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3485}
3486
3487/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3488///
3489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3490#[inline]
3491#[target_feature(enable = "avx512bw,avx512vl")]
3492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3493#[cfg_attr(test, assert_instr(vpcmp))]
3494pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3495    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3496}
3497
3498/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3499///
3500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3501#[inline]
3502#[target_feature(enable = "avx512bw,avx512vl")]
3503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3504#[cfg_attr(test, assert_instr(vpcmp))]
3505pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3506    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3507}
3508
3509/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3512#[inline]
3513#[target_feature(enable = "avx512bw,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpcmp))]
3516pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3517    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3518}
3519
3520/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3521///
3522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3523#[inline]
3524#[target_feature(enable = "avx512bw")]
3525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3526#[cfg_attr(test, assert_instr(vpcmp))]
3527pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3528    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3534#[inline]
3535#[target_feature(enable = "avx512bw")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3539    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3540}
3541
3542/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3543///
3544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3545#[inline]
3546#[target_feature(enable = "avx512bw,avx512vl")]
3547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3548#[cfg_attr(test, assert_instr(vpcmp))]
3549pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3550    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3551}
3552
3553/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3554///
3555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3556#[inline]
3557#[target_feature(enable = "avx512bw,avx512vl")]
3558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3559#[cfg_attr(test, assert_instr(vpcmp))]
3560pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3561    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3562}
3563
3564/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3565///
3566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3567#[inline]
3568#[target_feature(enable = "avx512bw,avx512vl")]
3569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3570#[cfg_attr(test, assert_instr(vpcmp))]
3571pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3572    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3573}
3574
3575/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3576///
3577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3578#[inline]
3579#[target_feature(enable = "avx512bw,avx512vl")]
3580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3581#[cfg_attr(test, assert_instr(vpcmp))]
3582pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3583    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3584}
3585
3586/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3587///
3588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3589#[inline]
3590#[target_feature(enable = "avx512bw")]
3591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3592#[cfg_attr(test, assert_instr(vpcmp))]
3593pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3594    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3595}
3596
3597/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3598///
3599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3600#[inline]
3601#[target_feature(enable = "avx512bw")]
3602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3603#[cfg_attr(test, assert_instr(vpcmp))]
3604pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3605    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3606}
3607
3608/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3609///
3610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3611#[inline]
3612#[target_feature(enable = "avx512bw,avx512vl")]
3613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3614#[cfg_attr(test, assert_instr(vpcmp))]
3615pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3616    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3617}
3618
3619/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3620///
3621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3622#[inline]
3623#[target_feature(enable = "avx512bw,avx512vl")]
3624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3625#[cfg_attr(test, assert_instr(vpcmp))]
3626pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3627    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3628}
3629
3630/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3631///
3632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3633#[inline]
3634#[target_feature(enable = "avx512bw,avx512vl")]
3635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3636#[cfg_attr(test, assert_instr(vpcmp))]
3637pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3638    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3639}
3640
3641/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3642///
3643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3644#[inline]
3645#[target_feature(enable = "avx512bw,avx512vl")]
3646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3647#[cfg_attr(test, assert_instr(vpcmp))]
3648pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3649    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3650}
3651
3652/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3653///
3654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3655#[inline]
3656#[target_feature(enable = "avx512bw")]
3657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3658#[cfg_attr(test, assert_instr(vpcmp))]
3659pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3660    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3666#[inline]
3667#[target_feature(enable = "avx512bw")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3671    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3672}
3673
3674/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3675///
3676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3677#[inline]
3678#[target_feature(enable = "avx512bw,avx512vl")]
3679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3680#[cfg_attr(test, assert_instr(vpcmp))]
3681pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3682    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3683}
3684
3685/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3686///
3687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3688#[inline]
3689#[target_feature(enable = "avx512bw,avx512vl")]
3690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3691#[cfg_attr(test, assert_instr(vpcmp))]
3692pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3693    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3694}
3695
3696/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3697///
3698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3699#[inline]
3700#[target_feature(enable = "avx512bw,avx512vl")]
3701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3702#[cfg_attr(test, assert_instr(vpcmp))]
3703pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3704    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3705}
3706
3707/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3708///
3709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3710#[inline]
3711#[target_feature(enable = "avx512bw,avx512vl")]
3712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3713#[cfg_attr(test, assert_instr(vpcmp))]
3714pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3715    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3716}
3717
3718/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3719///
3720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
3721#[inline]
3722#[target_feature(enable = "avx512bw")]
3723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3724#[cfg_attr(test, assert_instr(vpcmp))]
3725pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3726    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
3727}
3728
3729/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3732#[inline]
3733#[target_feature(enable = "avx512bw")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vpcmp))]
3736pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3737    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3738}
3739
3740/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3741///
3742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
3743#[inline]
3744#[target_feature(enable = "avx512bw,avx512vl")]
3745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3746#[cfg_attr(test, assert_instr(vpcmp))]
3747pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3748    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
3749}
3750
3751/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3752///
3753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3754#[inline]
3755#[target_feature(enable = "avx512bw,avx512vl")]
3756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3757#[cfg_attr(test, assert_instr(vpcmp))]
3758pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3759    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3760}
3761
3762/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3763///
3764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
3765#[inline]
3766#[target_feature(enable = "avx512bw,avx512vl")]
3767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3768#[cfg_attr(test, assert_instr(vpcmp))]
3769pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3770    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
3771}
3772
3773/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3774///
3775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3776#[inline]
3777#[target_feature(enable = "avx512bw,avx512vl")]
3778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3779#[cfg_attr(test, assert_instr(vpcmp))]
3780pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3781    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3782}
3783
3784/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3785///
3786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
3787#[inline]
3788#[target_feature(enable = "avx512bw")]
3789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3790#[cfg_attr(test, assert_instr(vpcmp))]
3791pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3803    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3804}
3805
3806/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3807///
3808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
3809#[inline]
3810#[target_feature(enable = "avx512bw,avx512vl")]
3811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3812#[cfg_attr(test, assert_instr(vpcmp))]
3813pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3814    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
3815}
3816
3817/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3818///
3819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3820#[inline]
3821#[target_feature(enable = "avx512bw,avx512vl")]
3822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3823#[cfg_attr(test, assert_instr(vpcmp))]
3824pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3825    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3826}
3827
3828/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3829///
3830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
3831#[inline]
3832#[target_feature(enable = "avx512bw,avx512vl")]
3833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3834#[cfg_attr(test, assert_instr(vpcmp))]
3835pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3836    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
3837}
3838
3839/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3840///
3841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3842#[inline]
3843#[target_feature(enable = "avx512bw,avx512vl")]
3844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3845#[cfg_attr(test, assert_instr(vpcmp))]
3846pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3847    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3848}
3849
3850/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3851///
3852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
3853#[inline]
3854#[target_feature(enable = "avx512bw")]
3855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3856#[cfg_attr(test, assert_instr(vpcmp))]
3857pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3858    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
3859}
3860
3861/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3864#[inline]
3865#[target_feature(enable = "avx512bw")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vpcmp))]
3868pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3869    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3870}
3871
3872/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3873///
3874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
3875#[inline]
3876#[target_feature(enable = "avx512bw,avx512vl")]
3877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3878#[cfg_attr(test, assert_instr(vpcmp))]
3879pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3880    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
3881}
3882
3883/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3884///
3885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3886#[inline]
3887#[target_feature(enable = "avx512bw,avx512vl")]
3888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3889#[cfg_attr(test, assert_instr(vpcmp))]
3890pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3891    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3892}
3893
3894/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3895///
3896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
3897#[inline]
3898#[target_feature(enable = "avx512bw,avx512vl")]
3899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3900#[cfg_attr(test, assert_instr(vpcmp))]
3901pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3902    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
3903}
3904
3905/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3906///
3907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3908#[inline]
3909#[target_feature(enable = "avx512bw,avx512vl")]
3910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3911#[cfg_attr(test, assert_instr(vpcmp))]
3912pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3913    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3914}
3915
3916/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3917///
3918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
3919#[inline]
3920#[target_feature(enable = "avx512bw")]
3921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3922#[cfg_attr(test, assert_instr(vpcmp))]
3923pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3924    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3935    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3936}
3937
3938/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3939///
3940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
3941#[inline]
3942#[target_feature(enable = "avx512bw,avx512vl")]
3943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3944#[cfg_attr(test, assert_instr(vpcmp))]
3945pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3946    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
3947}
3948
3949/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3950///
3951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3952#[inline]
3953#[target_feature(enable = "avx512bw,avx512vl")]
3954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3955#[cfg_attr(test, assert_instr(vpcmp))]
3956pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3957    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3958}
3959
3960/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3961///
3962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
3963#[inline]
3964#[target_feature(enable = "avx512bw,avx512vl")]
3965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3966#[cfg_attr(test, assert_instr(vpcmp))]
3967pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3968    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
3969}
3970
3971/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3972///
3973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3974#[inline]
3975#[target_feature(enable = "avx512bw,avx512vl")]
3976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3977#[cfg_attr(test, assert_instr(vpcmp))]
3978pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3979    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3980}
3981
3982/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3983///
3984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
3985#[inline]
3986#[target_feature(enable = "avx512bw")]
3987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3988#[cfg_attr(test, assert_instr(vpcmp))]
3989pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3990    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
3991}
3992
3993/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3996#[inline]
3997#[target_feature(enable = "avx512bw")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vpcmp))]
4000pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4001    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4002}
4003
4004/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4005///
4006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4007#[inline]
4008#[target_feature(enable = "avx512bw,avx512vl")]
4009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4010#[cfg_attr(test, assert_instr(vpcmp))]
4011pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4012    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4013}
4014
4015/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4016///
4017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4018#[inline]
4019#[target_feature(enable = "avx512bw,avx512vl")]
4020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4021#[cfg_attr(test, assert_instr(vpcmp))]
4022pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4023    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4024}
4025
4026/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4027///
4028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4029#[inline]
4030#[target_feature(enable = "avx512bw,avx512vl")]
4031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4032#[cfg_attr(test, assert_instr(vpcmp))]
4033pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4034    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4035}
4036
4037/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4038///
4039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4040#[inline]
4041#[target_feature(enable = "avx512bw,avx512vl")]
4042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4043#[cfg_attr(test, assert_instr(vpcmp))]
4044pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4045    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4046}
4047
4048/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4049///
4050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4051#[inline]
4052#[target_feature(enable = "avx512bw")]
4053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4054#[cfg_attr(test, assert_instr(vpcmp))]
4055pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4056    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4057}
4058
4059/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4062#[inline]
4063#[target_feature(enable = "avx512bw")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4067    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4068}
4069
4070/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4071///
4072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4073#[inline]
4074#[target_feature(enable = "avx512bw,avx512vl")]
4075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4076#[cfg_attr(test, assert_instr(vpcmp))]
4077pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4078    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4079}
4080
4081/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4082///
4083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4084#[inline]
4085#[target_feature(enable = "avx512bw,avx512vl")]
4086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4087#[cfg_attr(test, assert_instr(vpcmp))]
4088pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4089    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4090}
4091
4092/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4093///
4094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4095#[inline]
4096#[target_feature(enable = "avx512bw,avx512vl")]
4097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4098#[cfg_attr(test, assert_instr(vpcmp))]
4099pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4100    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4101}
4102
4103/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4104///
4105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4106#[inline]
4107#[target_feature(enable = "avx512bw,avx512vl")]
4108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4109#[cfg_attr(test, assert_instr(vpcmp))]
4110pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4111    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4112}
4113
4114/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4115///
4116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4117#[inline]
4118#[target_feature(enable = "avx512bw")]
4119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4120#[rustc_legacy_const_generics(2)]
4121#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4122pub fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4123    unsafe {
4124        static_assert_uimm_bits!(IMM8, 3);
4125        let a = a.as_u16x32();
4126        let b = b.as_u16x32();
4127        let r = match IMM8 {
4128            0 => simd_eq(a, b),
4129            1 => simd_lt(a, b),
4130            2 => simd_le(a, b),
4131            3 => i16x32::ZERO,
4132            4 => simd_ne(a, b),
4133            5 => simd_ge(a, b),
4134            6 => simd_gt(a, b),
4135            _ => i16x32::splat(-1),
4136        };
4137        simd_bitmask(r)
4138    }
4139}
4140
4141/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4142///
4143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4144#[inline]
4145#[target_feature(enable = "avx512bw")]
4146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4147#[rustc_legacy_const_generics(3)]
4148#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4149pub fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4150    k1: __mmask32,
4151    a: __m512i,
4152    b: __m512i,
4153) -> __mmask32 {
4154    unsafe {
4155        static_assert_uimm_bits!(IMM8, 3);
4156        let a = a.as_u16x32();
4157        let b = b.as_u16x32();
4158        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4159        let r = match IMM8 {
4160            0 => simd_and(k1, simd_eq(a, b)),
4161            1 => simd_and(k1, simd_lt(a, b)),
4162            2 => simd_and(k1, simd_le(a, b)),
4163            3 => i16x32::ZERO,
4164            4 => simd_and(k1, simd_ne(a, b)),
4165            5 => simd_and(k1, simd_ge(a, b)),
4166            6 => simd_and(k1, simd_gt(a, b)),
4167            _ => k1,
4168        };
4169        simd_bitmask(r)
4170    }
4171}
4172
4173/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4176#[inline]
4177#[target_feature(enable = "avx512bw,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[rustc_legacy_const_generics(2)]
4180#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4181pub fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4182    unsafe {
4183        static_assert_uimm_bits!(IMM8, 3);
4184        let a = a.as_u16x16();
4185        let b = b.as_u16x16();
4186        let r = match IMM8 {
4187            0 => simd_eq(a, b),
4188            1 => simd_lt(a, b),
4189            2 => simd_le(a, b),
4190            3 => i16x16::ZERO,
4191            4 => simd_ne(a, b),
4192            5 => simd_ge(a, b),
4193            6 => simd_gt(a, b),
4194            _ => i16x16::splat(-1),
4195        };
4196        simd_bitmask(r)
4197    }
4198}
4199
4200/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4201///
4202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4203#[inline]
4204#[target_feature(enable = "avx512bw,avx512vl")]
4205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4206#[rustc_legacy_const_generics(3)]
4207#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4208pub fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4209    k1: __mmask16,
4210    a: __m256i,
4211    b: __m256i,
4212) -> __mmask16 {
4213    unsafe {
4214        static_assert_uimm_bits!(IMM8, 3);
4215        let a = a.as_u16x16();
4216        let b = b.as_u16x16();
4217        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4218        let r = match IMM8 {
4219            0 => simd_and(k1, simd_eq(a, b)),
4220            1 => simd_and(k1, simd_lt(a, b)),
4221            2 => simd_and(k1, simd_le(a, b)),
4222            3 => i16x16::ZERO,
4223            4 => simd_and(k1, simd_ne(a, b)),
4224            5 => simd_and(k1, simd_ge(a, b)),
4225            6 => simd_and(k1, simd_gt(a, b)),
4226            _ => k1,
4227        };
4228        simd_bitmask(r)
4229    }
4230}
4231
4232/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4233///
4234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4235#[inline]
4236#[target_feature(enable = "avx512bw,avx512vl")]
4237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4238#[rustc_legacy_const_generics(2)]
4239#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4240pub fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4241    unsafe {
4242        static_assert_uimm_bits!(IMM8, 3);
4243        let a = a.as_u16x8();
4244        let b = b.as_u16x8();
4245        let r = match IMM8 {
4246            0 => simd_eq(a, b),
4247            1 => simd_lt(a, b),
4248            2 => simd_le(a, b),
4249            3 => i16x8::ZERO,
4250            4 => simd_ne(a, b),
4251            5 => simd_ge(a, b),
4252            6 => simd_gt(a, b),
4253            _ => i16x8::splat(-1),
4254        };
4255        simd_bitmask(r)
4256    }
4257}
4258
4259/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4260///
4261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4262#[inline]
4263#[target_feature(enable = "avx512bw,avx512vl")]
4264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4265#[rustc_legacy_const_generics(3)]
4266#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4267pub fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4268    unsafe {
4269        static_assert_uimm_bits!(IMM8, 3);
4270        let a = a.as_u16x8();
4271        let b = b.as_u16x8();
4272        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4273        let r = match IMM8 {
4274            0 => simd_and(k1, simd_eq(a, b)),
4275            1 => simd_and(k1, simd_lt(a, b)),
4276            2 => simd_and(k1, simd_le(a, b)),
4277            3 => i16x8::ZERO,
4278            4 => simd_and(k1, simd_ne(a, b)),
4279            5 => simd_and(k1, simd_ge(a, b)),
4280            6 => simd_and(k1, simd_gt(a, b)),
4281            _ => k1,
4282        };
4283        simd_bitmask(r)
4284    }
4285}
4286
4287/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[rustc_legacy_const_generics(2)]
4294#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4295pub fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4296    unsafe {
4297        static_assert_uimm_bits!(IMM8, 3);
4298        let a = a.as_u8x64();
4299        let b = b.as_u8x64();
4300        let r = match IMM8 {
4301            0 => simd_eq(a, b),
4302            1 => simd_lt(a, b),
4303            2 => simd_le(a, b),
4304            3 => i8x64::ZERO,
4305            4 => simd_ne(a, b),
4306            5 => simd_ge(a, b),
4307            6 => simd_gt(a, b),
4308            _ => i8x64::splat(-1),
4309        };
4310        simd_bitmask(r)
4311    }
4312}
4313
4314/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4315///
4316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4317#[inline]
4318#[target_feature(enable = "avx512bw")]
4319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4320#[rustc_legacy_const_generics(3)]
4321#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4322pub fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4323    k1: __mmask64,
4324    a: __m512i,
4325    b: __m512i,
4326) -> __mmask64 {
4327    unsafe {
4328        static_assert_uimm_bits!(IMM8, 3);
4329        let a = a.as_u8x64();
4330        let b = b.as_u8x64();
4331        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4332        let r = match IMM8 {
4333            0 => simd_and(k1, simd_eq(a, b)),
4334            1 => simd_and(k1, simd_lt(a, b)),
4335            2 => simd_and(k1, simd_le(a, b)),
4336            3 => i8x64::ZERO,
4337            4 => simd_and(k1, simd_ne(a, b)),
4338            5 => simd_and(k1, simd_ge(a, b)),
4339            6 => simd_and(k1, simd_gt(a, b)),
4340            _ => k1,
4341        };
4342        simd_bitmask(r)
4343    }
4344}
4345
4346/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4347///
4348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4349#[inline]
4350#[target_feature(enable = "avx512bw,avx512vl")]
4351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4352#[rustc_legacy_const_generics(2)]
4353#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4354pub fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4355    unsafe {
4356        static_assert_uimm_bits!(IMM8, 3);
4357        let a = a.as_u8x32();
4358        let b = b.as_u8x32();
4359        let r = match IMM8 {
4360            0 => simd_eq(a, b),
4361            1 => simd_lt(a, b),
4362            2 => simd_le(a, b),
4363            3 => i8x32::ZERO,
4364            4 => simd_ne(a, b),
4365            5 => simd_ge(a, b),
4366            6 => simd_gt(a, b),
4367            _ => i8x32::splat(-1),
4368        };
4369        simd_bitmask(r)
4370    }
4371}
4372
4373/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4374///
4375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4376#[inline]
4377#[target_feature(enable = "avx512bw,avx512vl")]
4378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4379#[rustc_legacy_const_generics(3)]
4380#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4381pub fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4382    k1: __mmask32,
4383    a: __m256i,
4384    b: __m256i,
4385) -> __mmask32 {
4386    unsafe {
4387        static_assert_uimm_bits!(IMM8, 3);
4388        let a = a.as_u8x32();
4389        let b = b.as_u8x32();
4390        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4391        let r = match IMM8 {
4392            0 => simd_and(k1, simd_eq(a, b)),
4393            1 => simd_and(k1, simd_lt(a, b)),
4394            2 => simd_and(k1, simd_le(a, b)),
4395            3 => i8x32::ZERO,
4396            4 => simd_and(k1, simd_ne(a, b)),
4397            5 => simd_and(k1, simd_ge(a, b)),
4398            6 => simd_and(k1, simd_gt(a, b)),
4399            _ => k1,
4400        };
4401        simd_bitmask(r)
4402    }
4403}
4404
4405/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4406///
4407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4408#[inline]
4409#[target_feature(enable = "avx512bw,avx512vl")]
4410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4411#[rustc_legacy_const_generics(2)]
4412#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4413pub fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4414    unsafe {
4415        static_assert_uimm_bits!(IMM8, 3);
4416        let a = a.as_u8x16();
4417        let b = b.as_u8x16();
4418        let r = match IMM8 {
4419            0 => simd_eq(a, b),
4420            1 => simd_lt(a, b),
4421            2 => simd_le(a, b),
4422            3 => i8x16::ZERO,
4423            4 => simd_ne(a, b),
4424            5 => simd_ge(a, b),
4425            6 => simd_gt(a, b),
4426            _ => i8x16::splat(-1),
4427        };
4428        simd_bitmask(r)
4429    }
4430}
4431
4432/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4433///
4434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4435#[inline]
4436#[target_feature(enable = "avx512bw,avx512vl")]
4437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4438#[rustc_legacy_const_generics(3)]
4439#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4440pub fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u8x16();
4444        let b = b.as_u8x16();
4445        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4446        let r = match IMM8 {
4447            0 => simd_and(k1, simd_eq(a, b)),
4448            1 => simd_and(k1, simd_lt(a, b)),
4449            2 => simd_and(k1, simd_le(a, b)),
4450            3 => i8x16::ZERO,
4451            4 => simd_and(k1, simd_ne(a, b)),
4452            5 => simd_and(k1, simd_ge(a, b)),
4453            6 => simd_and(k1, simd_gt(a, b)),
4454            _ => k1,
4455        };
4456        simd_bitmask(r)
4457    }
4458}
4459
4460/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4461///
4462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4463#[inline]
4464#[target_feature(enable = "avx512bw")]
4465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4466#[rustc_legacy_const_generics(2)]
4467#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4468pub fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4469    unsafe {
4470        static_assert_uimm_bits!(IMM8, 3);
4471        let a = a.as_i16x32();
4472        let b = b.as_i16x32();
4473        let r = match IMM8 {
4474            0 => simd_eq(a, b),
4475            1 => simd_lt(a, b),
4476            2 => simd_le(a, b),
4477            3 => i16x32::ZERO,
4478            4 => simd_ne(a, b),
4479            5 => simd_ge(a, b),
4480            6 => simd_gt(a, b),
4481            _ => i16x32::splat(-1),
4482        };
4483        simd_bitmask(r)
4484    }
4485}
4486
4487/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4488///
4489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4490#[inline]
4491#[target_feature(enable = "avx512bw")]
4492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4493#[rustc_legacy_const_generics(3)]
4494#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4495pub fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4496    k1: __mmask32,
4497    a: __m512i,
4498    b: __m512i,
4499) -> __mmask32 {
4500    unsafe {
4501        static_assert_uimm_bits!(IMM8, 3);
4502        let a = a.as_i16x32();
4503        let b = b.as_i16x32();
4504        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4505        let r = match IMM8 {
4506            0 => simd_and(k1, simd_eq(a, b)),
4507            1 => simd_and(k1, simd_lt(a, b)),
4508            2 => simd_and(k1, simd_le(a, b)),
4509            3 => i16x32::ZERO,
4510            4 => simd_and(k1, simd_ne(a, b)),
4511            5 => simd_and(k1, simd_ge(a, b)),
4512            6 => simd_and(k1, simd_gt(a, b)),
4513            _ => k1,
4514        };
4515        simd_bitmask(r)
4516    }
4517}
4518
4519/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4520///
4521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4522#[inline]
4523#[target_feature(enable = "avx512bw,avx512vl")]
4524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4525#[rustc_legacy_const_generics(2)]
4526#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4527pub fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4528    unsafe {
4529        static_assert_uimm_bits!(IMM8, 3);
4530        let a = a.as_i16x16();
4531        let b = b.as_i16x16();
4532        let r = match IMM8 {
4533            0 => simd_eq(a, b),
4534            1 => simd_lt(a, b),
4535            2 => simd_le(a, b),
4536            3 => i16x16::ZERO,
4537            4 => simd_ne(a, b),
4538            5 => simd_ge(a, b),
4539            6 => simd_gt(a, b),
4540            _ => i16x16::splat(-1),
4541        };
4542        simd_bitmask(r)
4543    }
4544}
4545
4546/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4547///
4548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4549#[inline]
4550#[target_feature(enable = "avx512bw,avx512vl")]
4551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4552#[rustc_legacy_const_generics(3)]
4553#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4554pub fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4555    k1: __mmask16,
4556    a: __m256i,
4557    b: __m256i,
4558) -> __mmask16 {
4559    unsafe {
4560        static_assert_uimm_bits!(IMM8, 3);
4561        let a = a.as_i16x16();
4562        let b = b.as_i16x16();
4563        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4564        let r = match IMM8 {
4565            0 => simd_and(k1, simd_eq(a, b)),
4566            1 => simd_and(k1, simd_lt(a, b)),
4567            2 => simd_and(k1, simd_le(a, b)),
4568            3 => i16x16::ZERO,
4569            4 => simd_and(k1, simd_ne(a, b)),
4570            5 => simd_and(k1, simd_ge(a, b)),
4571            6 => simd_and(k1, simd_gt(a, b)),
4572            _ => k1,
4573        };
4574        simd_bitmask(r)
4575    }
4576}
4577
4578/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4579///
4580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4581#[inline]
4582#[target_feature(enable = "avx512bw,avx512vl")]
4583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4584#[rustc_legacy_const_generics(2)]
4585#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4586pub fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4587    unsafe {
4588        static_assert_uimm_bits!(IMM8, 3);
4589        let a = a.as_i16x8();
4590        let b = b.as_i16x8();
4591        let r = match IMM8 {
4592            0 => simd_eq(a, b),
4593            1 => simd_lt(a, b),
4594            2 => simd_le(a, b),
4595            3 => i16x8::ZERO,
4596            4 => simd_ne(a, b),
4597            5 => simd_ge(a, b),
4598            6 => simd_gt(a, b),
4599            _ => i16x8::splat(-1),
4600        };
4601        simd_bitmask(r)
4602    }
4603}
4604
4605/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4608#[inline]
4609#[target_feature(enable = "avx512bw,avx512vl")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[rustc_legacy_const_generics(3)]
4612#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4613pub fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4614    unsafe {
4615        static_assert_uimm_bits!(IMM8, 3);
4616        let a = a.as_i16x8();
4617        let b = b.as_i16x8();
4618        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4619        let r = match IMM8 {
4620            0 => simd_and(k1, simd_eq(a, b)),
4621            1 => simd_and(k1, simd_lt(a, b)),
4622            2 => simd_and(k1, simd_le(a, b)),
4623            3 => i16x8::ZERO,
4624            4 => simd_and(k1, simd_ne(a, b)),
4625            5 => simd_and(k1, simd_ge(a, b)),
4626            6 => simd_and(k1, simd_gt(a, b)),
4627            _ => k1,
4628        };
4629        simd_bitmask(r)
4630    }
4631}
4632
4633/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4634///
4635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4636#[inline]
4637#[target_feature(enable = "avx512bw")]
4638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4639#[rustc_legacy_const_generics(2)]
4640#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4641pub fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4642    unsafe {
4643        static_assert_uimm_bits!(IMM8, 3);
4644        let a = a.as_i8x64();
4645        let b = b.as_i8x64();
4646        let r = match IMM8 {
4647            0 => simd_eq(a, b),
4648            1 => simd_lt(a, b),
4649            2 => simd_le(a, b),
4650            3 => i8x64::ZERO,
4651            4 => simd_ne(a, b),
4652            5 => simd_ge(a, b),
4653            6 => simd_gt(a, b),
4654            _ => i8x64::splat(-1),
4655        };
4656        simd_bitmask(r)
4657    }
4658}
4659
4660/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4661///
4662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
4663#[inline]
4664#[target_feature(enable = "avx512bw")]
4665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4666#[rustc_legacy_const_generics(3)]
4667#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4668pub fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
4669    k1: __mmask64,
4670    a: __m512i,
4671    b: __m512i,
4672) -> __mmask64 {
4673    unsafe {
4674        static_assert_uimm_bits!(IMM8, 3);
4675        let a = a.as_i8x64();
4676        let b = b.as_i8x64();
4677        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4678        let r = match IMM8 {
4679            0 => simd_and(k1, simd_eq(a, b)),
4680            1 => simd_and(k1, simd_lt(a, b)),
4681            2 => simd_and(k1, simd_le(a, b)),
4682            3 => i8x64::ZERO,
4683            4 => simd_and(k1, simd_ne(a, b)),
4684            5 => simd_and(k1, simd_ge(a, b)),
4685            6 => simd_and(k1, simd_gt(a, b)),
4686            _ => k1,
4687        };
4688        simd_bitmask(r)
4689    }
4690}
4691
4692/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4693///
4694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
4695#[inline]
4696#[target_feature(enable = "avx512bw,avx512vl")]
4697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4698#[rustc_legacy_const_generics(2)]
4699#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4700pub fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4701    unsafe {
4702        static_assert_uimm_bits!(IMM8, 3);
4703        let a = a.as_i8x32();
4704        let b = b.as_i8x32();
4705        let r = match IMM8 {
4706            0 => simd_eq(a, b),
4707            1 => simd_lt(a, b),
4708            2 => simd_le(a, b),
4709            3 => i8x32::ZERO,
4710            4 => simd_ne(a, b),
4711            5 => simd_ge(a, b),
4712            6 => simd_gt(a, b),
4713            _ => i8x32::splat(-1),
4714        };
4715        simd_bitmask(r)
4716    }
4717}
4718
4719/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4720///
4721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
4722#[inline]
4723#[target_feature(enable = "avx512bw,avx512vl")]
4724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4725#[rustc_legacy_const_generics(3)]
4726#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4727pub fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
4728    k1: __mmask32,
4729    a: __m256i,
4730    b: __m256i,
4731) -> __mmask32 {
4732    unsafe {
4733        static_assert_uimm_bits!(IMM8, 3);
4734        let a = a.as_i8x32();
4735        let b = b.as_i8x32();
4736        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4737        let r = match IMM8 {
4738            0 => simd_and(k1, simd_eq(a, b)),
4739            1 => simd_and(k1, simd_lt(a, b)),
4740            2 => simd_and(k1, simd_le(a, b)),
4741            3 => i8x32::ZERO,
4742            4 => simd_and(k1, simd_ne(a, b)),
4743            5 => simd_and(k1, simd_ge(a, b)),
4744            6 => simd_and(k1, simd_gt(a, b)),
4745            _ => k1,
4746        };
4747        simd_bitmask(r)
4748    }
4749}
4750
4751/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4752///
4753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
4754#[inline]
4755#[target_feature(enable = "avx512bw,avx512vl")]
4756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4757#[rustc_legacy_const_generics(2)]
4758#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4759pub fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4760    unsafe {
4761        static_assert_uimm_bits!(IMM8, 3);
4762        let a = a.as_i8x16();
4763        let b = b.as_i8x16();
4764        let r = match IMM8 {
4765            0 => simd_eq(a, b),
4766            1 => simd_lt(a, b),
4767            2 => simd_le(a, b),
4768            3 => i8x16::ZERO,
4769            4 => simd_ne(a, b),
4770            5 => simd_ge(a, b),
4771            6 => simd_gt(a, b),
4772            _ => i8x16::splat(-1),
4773        };
4774        simd_bitmask(r)
4775    }
4776}
4777
4778/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
4781#[inline]
4782#[target_feature(enable = "avx512bw,avx512vl")]
4783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4784#[rustc_legacy_const_generics(3)]
4785#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4786pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4787    unsafe {
4788        static_assert_uimm_bits!(IMM8, 3);
4789        let a = a.as_i8x16();
4790        let b = b.as_i8x16();
4791        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4792        let r = match IMM8 {
4793            0 => simd_and(k1, simd_eq(a, b)),
4794            1 => simd_and(k1, simd_lt(a, b)),
4795            2 => simd_and(k1, simd_le(a, b)),
4796            3 => i8x16::ZERO,
4797            4 => simd_and(k1, simd_ne(a, b)),
4798            5 => simd_and(k1, simd_ge(a, b)),
4799            6 => simd_and(k1, simd_gt(a, b)),
4800            _ => k1,
4801        };
4802        simd_bitmask(r)
4803    }
4804}
4805
4806/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4807///
4808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
4809#[inline]
4810#[target_feature(enable = "avx512bw,avx512vl")]
4811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4812pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4813    unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
4814}
4815
4816/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4817///
4818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
4819#[inline]
4820#[target_feature(enable = "avx512bw,avx512vl")]
4821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4822pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4823    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
4824}
4825
4826/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4827///
4828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
4829#[inline]
4830#[target_feature(enable = "avx512bw,avx512vl")]
4831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4832pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4833    unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
4834}
4835
4836/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4837///
4838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
4839#[inline]
4840#[target_feature(enable = "avx512bw,avx512vl")]
4841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4842pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4843    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
4844}
4845
4846/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4847///
4848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
4849#[inline]
4850#[target_feature(enable = "avx512bw,avx512vl")]
4851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4852pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4853    unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
4854}
4855
4856/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4857///
4858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
4859#[inline]
4860#[target_feature(enable = "avx512bw,avx512vl")]
4861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4862pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4863    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
4864}
4865
4866/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
4869#[inline]
4870#[target_feature(enable = "avx512bw,avx512vl")]
4871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4872pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4873    unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
4874}
4875
4876/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4877///
4878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
4879#[inline]
4880#[target_feature(enable = "avx512bw,avx512vl")]
4881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4882pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4883    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
4884}
4885
4886/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
4893    unsafe { simd_reduce_and(a.as_i16x16()) }
4894}
4895
4896/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4897///
4898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
4899#[inline]
4900#[target_feature(enable = "avx512bw,avx512vl")]
4901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4902pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
4903    unsafe {
4904        simd_reduce_and(simd_select_bitmask(
4905            k,
4906            a.as_i16x16(),
4907            _mm256_set1_epi64x(-1).as_i16x16(),
4908        ))
4909    }
4910}
4911
4912/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4913///
4914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
4915#[inline]
4916#[target_feature(enable = "avx512bw,avx512vl")]
4917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4918pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
4919    unsafe { simd_reduce_and(a.as_i16x8()) }
4920}
4921
4922/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4923///
4924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
4925#[inline]
4926#[target_feature(enable = "avx512bw,avx512vl")]
4927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4928pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
4929    unsafe {
4930        simd_reduce_and(simd_select_bitmask(
4931            k,
4932            a.as_i16x8(),
4933            _mm_set1_epi64x(-1).as_i16x8(),
4934        ))
4935    }
4936}
4937
4938/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4939///
4940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
4941#[inline]
4942#[target_feature(enable = "avx512bw,avx512vl")]
4943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4944pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
4945    unsafe { simd_reduce_and(a.as_i8x32()) }
4946}
4947
4948/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4949///
4950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
4951#[inline]
4952#[target_feature(enable = "avx512bw,avx512vl")]
4953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4954pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
4955    unsafe {
4956        simd_reduce_and(simd_select_bitmask(
4957            k,
4958            a.as_i8x32(),
4959            _mm256_set1_epi64x(-1).as_i8x32(),
4960        ))
4961    }
4962}
4963
4964/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4965///
4966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
4967#[inline]
4968#[target_feature(enable = "avx512bw,avx512vl")]
4969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4970pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
4971    unsafe { simd_reduce_and(a.as_i8x16()) }
4972}
4973
4974/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4975///
4976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
4977#[inline]
4978#[target_feature(enable = "avx512bw,avx512vl")]
4979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4980pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
4981    unsafe {
4982        simd_reduce_and(simd_select_bitmask(
4983            k,
4984            a.as_i8x16(),
4985            _mm_set1_epi64x(-1).as_i8x16(),
4986        ))
4987    }
4988}
4989
4990/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4991///
4992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
4993#[inline]
4994#[target_feature(enable = "avx512bw,avx512vl")]
4995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4996pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
4997    unsafe { simd_reduce_max(a.as_i16x16()) }
4998}
4999
5000/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5001///
5002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5003#[inline]
5004#[target_feature(enable = "avx512bw,avx512vl")]
5005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5006pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5007    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5008}
5009
5010/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5011///
5012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5013#[inline]
5014#[target_feature(enable = "avx512bw,avx512vl")]
5015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5016pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5017    unsafe { simd_reduce_max(a.as_i16x8()) }
5018}
5019
5020/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5023#[inline]
5024#[target_feature(enable = "avx512bw,avx512vl")]
5025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5026pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5027    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5028}
5029
5030/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5031///
5032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5033#[inline]
5034#[target_feature(enable = "avx512bw,avx512vl")]
5035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5036pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5037    unsafe { simd_reduce_max(a.as_i8x32()) }
5038}
5039
5040/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5041///
5042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5043#[inline]
5044#[target_feature(enable = "avx512bw,avx512vl")]
5045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5046pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5047    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5048}
5049
5050/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5051///
5052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5053#[inline]
5054#[target_feature(enable = "avx512bw,avx512vl")]
5055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5056pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5057    unsafe { simd_reduce_max(a.as_i8x16()) }
5058}
5059
5060/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5061///
5062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5063#[inline]
5064#[target_feature(enable = "avx512bw,avx512vl")]
5065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5066pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5067    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5068}
5069
5070/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5071///
5072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5073#[inline]
5074#[target_feature(enable = "avx512bw,avx512vl")]
5075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5076pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5077    unsafe { simd_reduce_max(a.as_u16x16()) }
5078}
5079
5080/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5081///
5082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5083#[inline]
5084#[target_feature(enable = "avx512bw,avx512vl")]
5085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5086pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5087    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5088}
5089
5090/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5091///
5092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5093#[inline]
5094#[target_feature(enable = "avx512bw,avx512vl")]
5095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5096pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5097    unsafe { simd_reduce_max(a.as_u16x8()) }
5098}
5099
5100/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5101///
5102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5103#[inline]
5104#[target_feature(enable = "avx512bw,avx512vl")]
5105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5106pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5107    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5108}
5109
5110/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5111///
5112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5113#[inline]
5114#[target_feature(enable = "avx512bw,avx512vl")]
5115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5116pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5117    unsafe { simd_reduce_max(a.as_u8x32()) }
5118}
5119
5120/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5121///
5122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5123#[inline]
5124#[target_feature(enable = "avx512bw,avx512vl")]
5125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5126pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5127    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5128}
5129
5130/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5137    unsafe { simd_reduce_max(a.as_u8x16()) }
5138}
5139
5140/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5141///
5142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5143#[inline]
5144#[target_feature(enable = "avx512bw,avx512vl")]
5145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5146pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5147    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5148}
5149
5150/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5151///
5152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5153#[inline]
5154#[target_feature(enable = "avx512bw,avx512vl")]
5155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5156pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5157    unsafe { simd_reduce_min(a.as_i16x16()) }
5158}
5159
5160/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5161///
5162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5163#[inline]
5164#[target_feature(enable = "avx512bw,avx512vl")]
5165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5166pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5167    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5168}
5169
5170/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5171///
5172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5173#[inline]
5174#[target_feature(enable = "avx512bw,avx512vl")]
5175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5176pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5177    unsafe { simd_reduce_min(a.as_i16x8()) }
5178}
5179
5180/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5183#[inline]
5184#[target_feature(enable = "avx512bw,avx512vl")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5187    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5188}
5189
5190/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5191///
5192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5193#[inline]
5194#[target_feature(enable = "avx512bw,avx512vl")]
5195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5196pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5197    unsafe { simd_reduce_min(a.as_i8x32()) }
5198}
5199
5200/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5201///
5202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5203#[inline]
5204#[target_feature(enable = "avx512bw,avx512vl")]
5205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5206pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5207    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5208}
5209
5210/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5211///
5212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5213#[inline]
5214#[target_feature(enable = "avx512bw,avx512vl")]
5215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5216pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5217    unsafe { simd_reduce_min(a.as_i8x16()) }
5218}
5219
5220/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5221///
5222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5223#[inline]
5224#[target_feature(enable = "avx512bw,avx512vl")]
5225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5226pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5227    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5228}
5229
5230/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5231///
5232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5233#[inline]
5234#[target_feature(enable = "avx512bw,avx512vl")]
5235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5236pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5237    unsafe { simd_reduce_min(a.as_u16x16()) }
5238}
5239
5240/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5247    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5248}
5249
5250/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5251///
5252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5253#[inline]
5254#[target_feature(enable = "avx512bw,avx512vl")]
5255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5256pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5257    unsafe { simd_reduce_min(a.as_u16x8()) }
5258}
5259
5260/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5261///
5262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5263#[inline]
5264#[target_feature(enable = "avx512bw,avx512vl")]
5265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5266pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5267    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5268}
5269
5270/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5271///
5272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5273#[inline]
5274#[target_feature(enable = "avx512bw,avx512vl")]
5275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5276pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5277    unsafe { simd_reduce_min(a.as_u8x32()) }
5278}
5279
5280/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5281///
5282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5283#[inline]
5284#[target_feature(enable = "avx512bw,avx512vl")]
5285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5286pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5287    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5288}
5289
5290/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5297    unsafe { simd_reduce_min(a.as_u8x16()) }
5298}
5299
5300/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5301///
5302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5303#[inline]
5304#[target_feature(enable = "avx512bw,avx512vl")]
5305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5306pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5307    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5308}
5309
5310/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5311///
5312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5313#[inline]
5314#[target_feature(enable = "avx512bw,avx512vl")]
5315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5316pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5317    unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
5318}
5319
5320/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5323#[inline]
5324#[target_feature(enable = "avx512bw,avx512vl")]
5325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5326pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5327    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
5328}
5329
5330/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5331///
5332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5333#[inline]
5334#[target_feature(enable = "avx512bw,avx512vl")]
5335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5336pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5337    unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
5338}
5339
5340/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5341///
5342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5343#[inline]
5344#[target_feature(enable = "avx512bw,avx512vl")]
5345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5346pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5347    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
5348}
5349
5350/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5351///
5352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5353#[inline]
5354#[target_feature(enable = "avx512bw,avx512vl")]
5355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5356pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5357    unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
5358}
5359
5360/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5363#[inline]
5364#[target_feature(enable = "avx512bw,avx512vl")]
5365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5366pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5367    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
5368}
5369
5370/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5371///
5372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5373#[inline]
5374#[target_feature(enable = "avx512bw,avx512vl")]
5375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5376pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5377    unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
5378}
5379
5380/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5381///
5382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5383#[inline]
5384#[target_feature(enable = "avx512bw,avx512vl")]
5385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5386pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5387    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
5388}
5389
5390/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5391///
5392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5393#[inline]
5394#[target_feature(enable = "avx512bw,avx512vl")]
5395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5396pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5397    unsafe { simd_reduce_or(a.as_i16x16()) }
5398}
5399
5400/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5403#[inline]
5404#[target_feature(enable = "avx512bw,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5407    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5408}
5409
5410/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5411///
5412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5413#[inline]
5414#[target_feature(enable = "avx512bw,avx512vl")]
5415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5416pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5417    unsafe { simd_reduce_or(a.as_i16x8()) }
5418}
5419
5420/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5421///
5422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5423#[inline]
5424#[target_feature(enable = "avx512bw,avx512vl")]
5425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5426pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5427    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5428}
5429
5430/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5431///
5432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5433#[inline]
5434#[target_feature(enable = "avx512bw,avx512vl")]
5435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5436pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5437    unsafe { simd_reduce_or(a.as_i8x32()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5447    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5448}
5449
5450/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5451///
5452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5453#[inline]
5454#[target_feature(enable = "avx512bw,avx512vl")]
5455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5456pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5457    unsafe { simd_reduce_or(a.as_i8x16()) }
5458}
5459
5460/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5461///
5462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5463#[inline]
5464#[target_feature(enable = "avx512bw,avx512vl")]
5465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5466pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5467    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5468}
5469
5470/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5471///
5472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5473#[inline]
5474#[target_feature(enable = "avx512bw")]
5475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5476#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5477pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5478    ptr::read_unaligned(mem_addr as *const __m512i)
5479}
5480
5481/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5482///
5483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5484#[inline]
5485#[target_feature(enable = "avx512bw,avx512vl")]
5486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5487#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5488pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5489    ptr::read_unaligned(mem_addr as *const __m256i)
5490}
5491
5492/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5493///
5494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5495#[inline]
5496#[target_feature(enable = "avx512bw,avx512vl")]
5497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5498#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5499pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5500    ptr::read_unaligned(mem_addr as *const __m128i)
5501}
5502
5503/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5504///
5505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5506#[inline]
5507#[target_feature(enable = "avx512bw")]
5508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5509#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5510pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5511    ptr::read_unaligned(mem_addr as *const __m512i)
5512}
5513
5514/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5515///
5516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5517#[inline]
5518#[target_feature(enable = "avx512bw,avx512vl")]
5519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5520#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5521pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5522    ptr::read_unaligned(mem_addr as *const __m256i)
5523}
5524
5525/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5526///
5527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5528#[inline]
5529#[target_feature(enable = "avx512bw,avx512vl")]
5530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5531#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5532pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5533    ptr::read_unaligned(mem_addr as *const __m128i)
5534}
5535
5536/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5537///
5538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5539#[inline]
5540#[target_feature(enable = "avx512bw")]
5541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5542#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5543pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5544    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5545}
5546
5547/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5548///
5549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5550#[inline]
5551#[target_feature(enable = "avx512bw,avx512vl")]
5552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5553#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5554pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5555    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5556}
5557
5558/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5559///
5560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5561#[inline]
5562#[target_feature(enable = "avx512bw,avx512vl")]
5563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5564#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5565pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5566    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5567}
5568
5569/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5570///
5571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
5572#[inline]
5573#[target_feature(enable = "avx512bw")]
5574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5575#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5576pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
5577    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5578}
5579
5580/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5581///
5582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
5583#[inline]
5584#[target_feature(enable = "avx512bw,avx512vl")]
5585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5586#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5587pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
5588    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5589}
5590
5591/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5592///
5593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
5594#[inline]
5595#[target_feature(enable = "avx512bw,avx512vl")]
5596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5597#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5598pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
5599    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5600}
5601
5602/// Load packed 16-bit integers from memory into dst using writemask k
5603/// (elements are copied from src when the corresponding mask bit is not set).
5604/// mem_addr does not need to be aligned on any particular boundary.
5605///
5606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
5607#[inline]
5608#[target_feature(enable = "avx512bw")]
5609#[cfg_attr(test, assert_instr(vmovdqu16))]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
5612    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
5613    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
5614}
5615
5616/// Load packed 16-bit integers from memory into dst using zeromask k
5617/// (elements are zeroed out when the corresponding mask bit is not set).
5618/// mem_addr does not need to be aligned on any particular boundary.
5619///
5620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
5621#[inline]
5622#[target_feature(enable = "avx512bw")]
5623#[cfg_attr(test, assert_instr(vmovdqu16))]
5624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5625pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
5626    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
5627}
5628
5629/// Load packed 8-bit integers from memory into dst using writemask k
5630/// (elements are copied from src when the corresponding mask bit is not set).
5631/// mem_addr does not need to be aligned on any particular boundary.
5632///
5633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
5634#[inline]
5635#[target_feature(enable = "avx512bw")]
5636#[cfg_attr(test, assert_instr(vmovdqu8))]
5637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5638pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
5639    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
5640    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
5641}
5642
5643/// Load packed 8-bit integers from memory into dst using zeromask k
5644/// (elements are zeroed out when the corresponding mask bit is not set).
5645/// mem_addr does not need to be aligned on any particular boundary.
5646///
5647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
5648#[inline]
5649#[target_feature(enable = "avx512bw")]
5650#[cfg_attr(test, assert_instr(vmovdqu8))]
5651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5652pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
5653    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
5654}
5655
5656/// Load packed 16-bit integers from memory into dst using writemask k
5657/// (elements are copied from src when the corresponding mask bit is not set).
5658/// mem_addr does not need to be aligned on any particular boundary.
5659///
5660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
5661#[inline]
5662#[target_feature(enable = "avx512bw,avx512vl")]
5663#[cfg_attr(test, assert_instr(vmovdqu16))]
5664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5665pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
5666    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
5667    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
5668}
5669
5670/// Load packed 16-bit integers from memory into dst using zeromask k
5671/// (elements are zeroed out when the corresponding mask bit is not set).
5672/// mem_addr does not need to be aligned on any particular boundary.
5673///
5674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
5675#[inline]
5676#[target_feature(enable = "avx512bw,avx512vl")]
5677#[cfg_attr(test, assert_instr(vmovdqu16))]
5678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5679pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
5680    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
5681}
5682
5683/// Load packed 8-bit integers from memory into dst using writemask k
5684/// (elements are copied from src when the corresponding mask bit is not set).
5685/// mem_addr does not need to be aligned on any particular boundary.
5686///
5687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
5688#[inline]
5689#[target_feature(enable = "avx512bw,avx512vl")]
5690#[cfg_attr(test, assert_instr(vmovdqu8))]
5691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5692pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
5693    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
5694    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
5695}
5696
5697/// Load packed 8-bit integers from memory into dst using zeromask k
5698/// (elements are zeroed out when the corresponding mask bit is not set).
5699/// mem_addr does not need to be aligned on any particular boundary.
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
5702#[inline]
5703#[target_feature(enable = "avx512bw,avx512vl")]
5704#[cfg_attr(test, assert_instr(vmovdqu8))]
5705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5706pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
5707    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
5708}
5709
5710/// Load packed 16-bit integers from memory into dst using writemask k
5711/// (elements are copied from src when the corresponding mask bit is not set).
5712/// mem_addr does not need to be aligned on any particular boundary.
5713///
5714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
5715#[inline]
5716#[target_feature(enable = "avx512bw,avx512vl")]
5717#[cfg_attr(test, assert_instr(vmovdqu16))]
5718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5719pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
5720    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
5721    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
5722}
5723
5724/// Load packed 16-bit integers from memory into dst using zeromask k
5725/// (elements are zeroed out when the corresponding mask bit is not set).
5726/// mem_addr does not need to be aligned on any particular boundary.
5727///
5728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
5729#[inline]
5730#[target_feature(enable = "avx512bw,avx512vl")]
5731#[cfg_attr(test, assert_instr(vmovdqu16))]
5732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5733pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
5734    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
5735}
5736
5737/// Load packed 8-bit integers from memory into dst using writemask k
5738/// (elements are copied from src when the corresponding mask bit is not set).
5739/// mem_addr does not need to be aligned on any particular boundary.
5740///
5741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
5742#[inline]
5743#[target_feature(enable = "avx512bw,avx512vl")]
5744#[cfg_attr(test, assert_instr(vmovdqu8))]
5745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5746pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
5747    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
5748    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
5749}
5750
5751/// Load packed 8-bit integers from memory into dst using zeromask k
5752/// (elements are zeroed out when the corresponding mask bit is not set).
5753/// mem_addr does not need to be aligned on any particular boundary.
5754///
5755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
5756#[inline]
5757#[target_feature(enable = "avx512bw,avx512vl")]
5758#[cfg_attr(test, assert_instr(vmovdqu8))]
5759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5760pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
5761    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
5762}
5763
5764/// Store packed 16-bit integers from a into memory using writemask k.
5765/// mem_addr does not need to be aligned on any particular boundary.
5766///
5767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
5768#[inline]
5769#[target_feature(enable = "avx512bw")]
5770#[cfg_attr(test, assert_instr(vmovdqu16))]
5771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5772pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
5773    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
5774    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
5775}
5776
5777/// Store packed 8-bit integers from a into memory using writemask k.
5778/// mem_addr does not need to be aligned on any particular boundary.
5779///
5780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
5781#[inline]
5782#[target_feature(enable = "avx512bw")]
5783#[cfg_attr(test, assert_instr(vmovdqu8))]
5784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5785pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
5786    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
5787    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
5788}
5789
5790/// Store packed 16-bit integers from a into memory using writemask k.
5791/// mem_addr does not need to be aligned on any particular boundary.
5792///
5793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
5794#[inline]
5795#[target_feature(enable = "avx512bw,avx512vl")]
5796#[cfg_attr(test, assert_instr(vmovdqu16))]
5797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5798pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
5799    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
5800    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
5801}
5802
5803/// Store packed 8-bit integers from a into memory using writemask k.
5804/// mem_addr does not need to be aligned on any particular boundary.
5805///
5806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
5807#[inline]
5808#[target_feature(enable = "avx512bw,avx512vl")]
5809#[cfg_attr(test, assert_instr(vmovdqu8))]
5810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5811pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
5812    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
5813    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
5814}
5815
5816/// Store packed 16-bit integers from a into memory using writemask k.
5817/// mem_addr does not need to be aligned on any particular boundary.
5818///
5819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
5820#[inline]
5821#[target_feature(enable = "avx512bw,avx512vl")]
5822#[cfg_attr(test, assert_instr(vmovdqu16))]
5823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5824pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
5825    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
5826    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
5827}
5828
5829/// Store packed 8-bit integers from a into memory using writemask k.
5830/// mem_addr does not need to be aligned on any particular boundary.
5831///
5832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
5833#[inline]
5834#[target_feature(enable = "avx512bw,avx512vl")]
5835#[cfg_attr(test, assert_instr(vmovdqu8))]
5836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5837pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
5838    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
5839    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
5840}
5841
5842/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
5843///
5844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
5845#[inline]
5846#[target_feature(enable = "avx512bw")]
5847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5848#[cfg_attr(test, assert_instr(vpmaddwd))]
5849pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5850    unsafe {
5851        let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32()));
5852        let even: i32x16 = simd_shuffle!(
5853            r,
5854            r,
5855            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
5856        );
5857        let odd: i32x16 = simd_shuffle!(
5858            r,
5859            r,
5860            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
5861        );
5862        simd_add(even, odd).as_m512i()
5863    }
5864}
5865
5866/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5867///
5868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
5869#[inline]
5870#[target_feature(enable = "avx512bw")]
5871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5872#[cfg_attr(test, assert_instr(vpmaddwd))]
5873pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5874    unsafe {
5875        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5876        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
5877    }
5878}
5879
5880/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5881///
5882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
5883#[inline]
5884#[target_feature(enable = "avx512bw")]
5885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5886#[cfg_attr(test, assert_instr(vpmaddwd))]
5887pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5888    unsafe {
5889        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5890        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
5891    }
5892}
5893
5894/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
5897#[inline]
5898#[target_feature(enable = "avx512bw,avx512vl")]
5899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5900#[cfg_attr(test, assert_instr(vpmaddwd))]
5901pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5902    unsafe {
5903        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5904        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
5905    }
5906}
5907
5908/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5909///
5910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
5911#[inline]
5912#[target_feature(enable = "avx512bw,avx512vl")]
5913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5914#[cfg_attr(test, assert_instr(vpmaddwd))]
5915pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5916    unsafe {
5917        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5918        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
5919    }
5920}
5921
5922/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5923///
5924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
5925#[inline]
5926#[target_feature(enable = "avx512bw,avx512vl")]
5927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5928#[cfg_attr(test, assert_instr(vpmaddwd))]
5929pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5930    unsafe {
5931        let madd = _mm_madd_epi16(a, b).as_i32x4();
5932        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
5933    }
5934}
5935
5936/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5937///
5938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
5939#[inline]
5940#[target_feature(enable = "avx512bw,avx512vl")]
5941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5942#[cfg_attr(test, assert_instr(vpmaddwd))]
5943pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5944    unsafe {
5945        let madd = _mm_madd_epi16(a, b).as_i32x4();
5946        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
5947    }
5948}
5949
5950/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
5951///
5952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
5953#[inline]
5954#[target_feature(enable = "avx512bw")]
5955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5956#[cfg_attr(test, assert_instr(vpmaddubsw))]
5957pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
5958    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
5959}
5960
5961/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5962///
5963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
5964#[inline]
5965#[target_feature(enable = "avx512bw")]
5966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5967#[cfg_attr(test, assert_instr(vpmaddubsw))]
5968pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5969    unsafe {
5970        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5971        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
5972    }
5973}
5974
5975/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5976///
5977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
5978#[inline]
5979#[target_feature(enable = "avx512bw")]
5980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5981#[cfg_attr(test, assert_instr(vpmaddubsw))]
5982pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5983    unsafe {
5984        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5985        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
5986    }
5987}
5988
5989/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5990///
5991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
5992#[inline]
5993#[target_feature(enable = "avx512bw,avx512vl")]
5994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5995#[cfg_attr(test, assert_instr(vpmaddubsw))]
5996pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5997    unsafe {
5998        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5999        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
6000    }
6001}
6002
6003/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6004///
6005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
6006#[inline]
6007#[target_feature(enable = "avx512bw,avx512vl")]
6008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6009#[cfg_attr(test, assert_instr(vpmaddubsw))]
6010pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6011    unsafe {
6012        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6013        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6014    }
6015}
6016
6017/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6018///
6019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6020#[inline]
6021#[target_feature(enable = "avx512bw,avx512vl")]
6022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6023#[cfg_attr(test, assert_instr(vpmaddubsw))]
6024pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6025    unsafe {
6026        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6027        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6028    }
6029}
6030
6031/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6032///
6033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6034#[inline]
6035#[target_feature(enable = "avx512bw,avx512vl")]
6036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6037#[cfg_attr(test, assert_instr(vpmaddubsw))]
6038pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6039    unsafe {
6040        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6041        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6042    }
6043}
6044
6045/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6046///
6047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6048#[inline]
6049#[target_feature(enable = "avx512bw")]
6050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6051#[cfg_attr(test, assert_instr(vpackssdw))]
6052pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6053    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6054}
6055
6056/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6057///
6058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6059#[inline]
6060#[target_feature(enable = "avx512bw")]
6061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6062#[cfg_attr(test, assert_instr(vpackssdw))]
6063pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6064    unsafe {
6065        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6066        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6067    }
6068}
6069
6070/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6071///
6072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6073#[inline]
6074#[target_feature(enable = "avx512bw")]
6075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6076#[cfg_attr(test, assert_instr(vpackssdw))]
6077pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6078    unsafe {
6079        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6080        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6081    }
6082}
6083
6084/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6087#[inline]
6088#[target_feature(enable = "avx512bw,avx512vl")]
6089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6090#[cfg_attr(test, assert_instr(vpackssdw))]
6091pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6092    unsafe {
6093        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6094        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6095    }
6096}
6097
6098/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6099///
6100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6101#[inline]
6102#[target_feature(enable = "avx512bw,avx512vl")]
6103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6104#[cfg_attr(test, assert_instr(vpackssdw))]
6105pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6106    unsafe {
6107        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6108        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6109    }
6110}
6111
6112/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6113///
6114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6115#[inline]
6116#[target_feature(enable = "avx512bw,avx512vl")]
6117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6118#[cfg_attr(test, assert_instr(vpackssdw))]
6119pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6120    unsafe {
6121        let pack = _mm_packs_epi32(a, b).as_i16x8();
6122        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6123    }
6124}
6125
6126/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6127///
6128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6129#[inline]
6130#[target_feature(enable = "avx512bw,avx512vl")]
6131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6132#[cfg_attr(test, assert_instr(vpackssdw))]
6133pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6134    unsafe {
6135        let pack = _mm_packs_epi32(a, b).as_i16x8();
6136        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6137    }
6138}
6139
6140/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6141///
6142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6143#[inline]
6144#[target_feature(enable = "avx512bw")]
6145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6146#[cfg_attr(test, assert_instr(vpacksswb))]
6147pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6148    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6149}
6150
6151/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6152///
6153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6154#[inline]
6155#[target_feature(enable = "avx512bw")]
6156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6157#[cfg_attr(test, assert_instr(vpacksswb))]
6158pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6159    unsafe {
6160        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6161        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6162    }
6163}
6164
6165/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6166///
6167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6168#[inline]
6169#[target_feature(enable = "avx512bw")]
6170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6171#[cfg_attr(test, assert_instr(vpacksswb))]
6172pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6173    unsafe {
6174        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6175        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6176    }
6177}
6178
6179/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6180///
6181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6182#[inline]
6183#[target_feature(enable = "avx512bw,avx512vl")]
6184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6185#[cfg_attr(test, assert_instr(vpacksswb))]
6186pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6187    unsafe {
6188        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6189        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6190    }
6191}
6192
6193/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6194///
6195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6196#[inline]
6197#[target_feature(enable = "avx512bw,avx512vl")]
6198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6199#[cfg_attr(test, assert_instr(vpacksswb))]
6200pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6201    unsafe {
6202        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6203        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6204    }
6205}
6206
6207/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6208///
6209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6210#[inline]
6211#[target_feature(enable = "avx512bw,avx512vl")]
6212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6213#[cfg_attr(test, assert_instr(vpacksswb))]
6214pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6215    unsafe {
6216        let pack = _mm_packs_epi16(a, b).as_i8x16();
6217        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6218    }
6219}
6220
6221/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6224#[inline]
6225#[target_feature(enable = "avx512bw,avx512vl")]
6226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6227#[cfg_attr(test, assert_instr(vpacksswb))]
6228pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6229    unsafe {
6230        let pack = _mm_packs_epi16(a, b).as_i8x16();
6231        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6232    }
6233}
6234
6235/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6236///
6237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6238#[inline]
6239#[target_feature(enable = "avx512bw")]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241#[cfg_attr(test, assert_instr(vpackusdw))]
6242pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6243    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6244}
6245
6246/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6247///
6248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6249#[inline]
6250#[target_feature(enable = "avx512bw")]
6251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6252#[cfg_attr(test, assert_instr(vpackusdw))]
6253pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6254    unsafe {
6255        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6256        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6257    }
6258}
6259
6260/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6261///
6262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6263#[inline]
6264#[target_feature(enable = "avx512bw")]
6265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6266#[cfg_attr(test, assert_instr(vpackusdw))]
6267pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6268    unsafe {
6269        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6270        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6271    }
6272}
6273
6274/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6275///
6276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6277#[inline]
6278#[target_feature(enable = "avx512bw,avx512vl")]
6279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6280#[cfg_attr(test, assert_instr(vpackusdw))]
6281pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6282    unsafe {
6283        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6284        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6285    }
6286}
6287
6288/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6289///
6290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6291#[inline]
6292#[target_feature(enable = "avx512bw,avx512vl")]
6293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6294#[cfg_attr(test, assert_instr(vpackusdw))]
6295pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6296    unsafe {
6297        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6298        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6299    }
6300}
6301
6302/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6303///
6304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6305#[inline]
6306#[target_feature(enable = "avx512bw,avx512vl")]
6307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6308#[cfg_attr(test, assert_instr(vpackusdw))]
6309pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6310    unsafe {
6311        let pack = _mm_packus_epi32(a, b).as_i16x8();
6312        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6313    }
6314}
6315
6316/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6317///
6318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6319#[inline]
6320#[target_feature(enable = "avx512bw,avx512vl")]
6321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6322#[cfg_attr(test, assert_instr(vpackusdw))]
6323pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6324    unsafe {
6325        let pack = _mm_packus_epi32(a, b).as_i16x8();
6326        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6327    }
6328}
6329
6330/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6331///
6332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6333#[inline]
6334#[target_feature(enable = "avx512bw")]
6335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6336#[cfg_attr(test, assert_instr(vpackuswb))]
6337pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6338    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6339}
6340
6341/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6342///
6343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6344#[inline]
6345#[target_feature(enable = "avx512bw")]
6346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6347#[cfg_attr(test, assert_instr(vpackuswb))]
6348pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6349    unsafe {
6350        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6351        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6352    }
6353}
6354
6355/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6356///
6357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6358#[inline]
6359#[target_feature(enable = "avx512bw")]
6360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6361#[cfg_attr(test, assert_instr(vpackuswb))]
6362pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6363    unsafe {
6364        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6365        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6366    }
6367}
6368
6369/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6370///
6371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6372#[inline]
6373#[target_feature(enable = "avx512bw,avx512vl")]
6374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6375#[cfg_attr(test, assert_instr(vpackuswb))]
6376pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6377    unsafe {
6378        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6379        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6380    }
6381}
6382
6383/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6384///
6385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6386#[inline]
6387#[target_feature(enable = "avx512bw,avx512vl")]
6388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6389#[cfg_attr(test, assert_instr(vpackuswb))]
6390pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6391    unsafe {
6392        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6393        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6394    }
6395}
6396
6397/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6398///
6399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6400#[inline]
6401#[target_feature(enable = "avx512bw,avx512vl")]
6402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6403#[cfg_attr(test, assert_instr(vpackuswb))]
6404pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6405    unsafe {
6406        let pack = _mm_packus_epi16(a, b).as_i8x16();
6407        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6408    }
6409}
6410
6411/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6412///
6413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6414#[inline]
6415#[target_feature(enable = "avx512bw,avx512vl")]
6416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6417#[cfg_attr(test, assert_instr(vpackuswb))]
6418pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6419    unsafe {
6420        let pack = _mm_packus_epi16(a, b).as_i8x16();
6421        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6422    }
6423}
6424
6425/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6426///
6427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6428#[inline]
6429#[target_feature(enable = "avx512bw")]
6430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6431#[cfg_attr(test, assert_instr(vpavgw))]
6432pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6433    unsafe {
6434        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6435        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6436        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6437        transmute(simd_cast::<_, u16x32>(r))
6438    }
6439}
6440
6441/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6442///
6443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6444#[inline]
6445#[target_feature(enable = "avx512bw")]
6446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6447#[cfg_attr(test, assert_instr(vpavgw))]
6448pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6449    unsafe {
6450        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6451        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6452    }
6453}
6454
6455/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6456///
6457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6458#[inline]
6459#[target_feature(enable = "avx512bw")]
6460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6461#[cfg_attr(test, assert_instr(vpavgw))]
6462pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6463    unsafe {
6464        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6465        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6466    }
6467}
6468
6469/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6470///
6471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6472#[inline]
6473#[target_feature(enable = "avx512bw,avx512vl")]
6474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6475#[cfg_attr(test, assert_instr(vpavgw))]
6476pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6477    unsafe {
6478        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6479        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6480    }
6481}
6482
6483/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6484///
6485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6486#[inline]
6487#[target_feature(enable = "avx512bw,avx512vl")]
6488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6489#[cfg_attr(test, assert_instr(vpavgw))]
6490pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6491    unsafe {
6492        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6493        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6494    }
6495}
6496
6497/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6498///
6499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6500#[inline]
6501#[target_feature(enable = "avx512bw,avx512vl")]
6502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6503#[cfg_attr(test, assert_instr(vpavgw))]
6504pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6505    unsafe {
6506        let avg = _mm_avg_epu16(a, b).as_u16x8();
6507        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6508    }
6509}
6510
6511/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6512///
6513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6514#[inline]
6515#[target_feature(enable = "avx512bw,avx512vl")]
6516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6517#[cfg_attr(test, assert_instr(vpavgw))]
6518pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6519    unsafe {
6520        let avg = _mm_avg_epu16(a, b).as_u16x8();
6521        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
6522    }
6523}
6524
6525/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
6526///
6527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
6528#[inline]
6529#[target_feature(enable = "avx512bw")]
6530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6531#[cfg_attr(test, assert_instr(vpavgb))]
6532pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
6533    unsafe {
6534        let a = simd_cast::<_, u16x64>(a.as_u8x64());
6535        let b = simd_cast::<_, u16x64>(b.as_u8x64());
6536        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
6537        transmute(simd_cast::<_, u8x64>(r))
6538    }
6539}
6540
6541/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6542///
6543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
6544#[inline]
6545#[target_feature(enable = "avx512bw")]
6546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6547#[cfg_attr(test, assert_instr(vpavgb))]
6548pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6549    unsafe {
6550        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6551        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
6552    }
6553}
6554
6555/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6556///
6557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
6558#[inline]
6559#[target_feature(enable = "avx512bw")]
6560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6561#[cfg_attr(test, assert_instr(vpavgb))]
6562pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6563    unsafe {
6564        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6565        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
6566    }
6567}
6568
6569/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6570///
6571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
6572#[inline]
6573#[target_feature(enable = "avx512bw,avx512vl")]
6574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6575#[cfg_attr(test, assert_instr(vpavgb))]
6576pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6577    unsafe {
6578        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6579        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
6580    }
6581}
6582
6583/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6584///
6585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
6586#[inline]
6587#[target_feature(enable = "avx512bw,avx512vl")]
6588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6589#[cfg_attr(test, assert_instr(vpavgb))]
6590pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6591    unsafe {
6592        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6593        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
6594    }
6595}
6596
6597/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6598///
6599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
6600#[inline]
6601#[target_feature(enable = "avx512bw,avx512vl")]
6602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6603#[cfg_attr(test, assert_instr(vpavgb))]
6604pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6605    unsafe {
6606        let avg = _mm_avg_epu8(a, b).as_u8x16();
6607        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
6608    }
6609}
6610
6611/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6612///
6613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
6614#[inline]
6615#[target_feature(enable = "avx512bw,avx512vl")]
6616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6617#[cfg_attr(test, assert_instr(vpavgb))]
6618pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6619    unsafe {
6620        let avg = _mm_avg_epu8(a, b).as_u8x16();
6621        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
6622    }
6623}
6624
6625/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
6626///
6627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
6628#[inline]
6629#[target_feature(enable = "avx512bw")]
6630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6631#[cfg_attr(test, assert_instr(vpsllw))]
6632pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
6633    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
6634}
6635
6636/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6637///
6638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
6639#[inline]
6640#[target_feature(enable = "avx512bw")]
6641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6642#[cfg_attr(test, assert_instr(vpsllw))]
6643pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6644    unsafe {
6645        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6646        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6647    }
6648}
6649
6650/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6651///
6652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
6653#[inline]
6654#[target_feature(enable = "avx512bw")]
6655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6656#[cfg_attr(test, assert_instr(vpsllw))]
6657pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6658    unsafe {
6659        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6660        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6661    }
6662}
6663
6664/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6665///
6666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
6667#[inline]
6668#[target_feature(enable = "avx512bw,avx512vl")]
6669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6670#[cfg_attr(test, assert_instr(vpsllw))]
6671pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6672    unsafe {
6673        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6674        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6675    }
6676}
6677
6678/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6679///
6680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
6681#[inline]
6682#[target_feature(enable = "avx512bw,avx512vl")]
6683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6684#[cfg_attr(test, assert_instr(vpsllw))]
6685pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6686    unsafe {
6687        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6688        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6689    }
6690}
6691
6692/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6693///
6694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
6695#[inline]
6696#[target_feature(enable = "avx512bw,avx512vl")]
6697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6698#[cfg_attr(test, assert_instr(vpsllw))]
6699pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6700    unsafe {
6701        let shf = _mm_sll_epi16(a, count).as_i16x8();
6702        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6703    }
6704}
6705
6706/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6707///
6708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
6709#[inline]
6710#[target_feature(enable = "avx512bw,avx512vl")]
6711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6712#[cfg_attr(test, assert_instr(vpsllw))]
6713pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6714    unsafe {
6715        let shf = _mm_sll_epi16(a, count).as_i16x8();
6716        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6717    }
6718}
6719
6720/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
6723#[inline]
6724#[target_feature(enable = "avx512bw")]
6725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6726#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6727#[rustc_legacy_const_generics(1)]
6728pub fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
6729    unsafe {
6730        static_assert_uimm_bits!(IMM8, 8);
6731        if IMM8 >= 16 {
6732            _mm512_setzero_si512()
6733        } else {
6734            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
6735        }
6736    }
6737}
6738
6739/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6740///
6741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
6742#[inline]
6743#[target_feature(enable = "avx512bw")]
6744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6745#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6746#[rustc_legacy_const_generics(3)]
6747pub fn _mm512_mask_slli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
6748    unsafe {
6749        static_assert_uimm_bits!(IMM8, 8);
6750        let shf = if IMM8 >= 16 {
6751            u16x32::ZERO
6752        } else {
6753            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
6754        };
6755        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
6756    }
6757}
6758
6759/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6760///
6761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
6762#[inline]
6763#[target_feature(enable = "avx512bw")]
6764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6765#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6766#[rustc_legacy_const_generics(2)]
6767pub fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
6768    unsafe {
6769        static_assert_uimm_bits!(IMM8, 8);
6770        if IMM8 >= 16 {
6771            _mm512_setzero_si512()
6772        } else {
6773            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
6774            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
6775        }
6776    }
6777}
6778
6779/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6780///
6781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
6782#[inline]
6783#[target_feature(enable = "avx512bw,avx512vl")]
6784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6785#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6786#[rustc_legacy_const_generics(3)]
6787pub fn _mm256_mask_slli_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
6788    unsafe {
6789        static_assert_uimm_bits!(IMM8, 8);
6790        let shf = if IMM8 >= 16 {
6791            u16x16::ZERO
6792        } else {
6793            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
6794        };
6795        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
6796    }
6797}
6798
6799/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6800///
6801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
6802#[inline]
6803#[target_feature(enable = "avx512bw,avx512vl")]
6804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6805#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6806#[rustc_legacy_const_generics(2)]
6807pub fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
6808    unsafe {
6809        static_assert_uimm_bits!(IMM8, 8);
6810        if IMM8 >= 16 {
6811            _mm256_setzero_si256()
6812        } else {
6813            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
6814            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
6815        }
6816    }
6817}
6818
6819/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6820///
6821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
6822#[inline]
6823#[target_feature(enable = "avx512bw,avx512vl")]
6824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6825#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6826#[rustc_legacy_const_generics(3)]
6827pub fn _mm_mask_slli_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6828    unsafe {
6829        static_assert_uimm_bits!(IMM8, 8);
6830        let shf = if IMM8 >= 16 {
6831            u16x8::ZERO
6832        } else {
6833            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
6834        };
6835        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
6836    }
6837}
6838
6839/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6840///
6841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
6842#[inline]
6843#[target_feature(enable = "avx512bw,avx512vl")]
6844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6845#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6846#[rustc_legacy_const_generics(2)]
6847pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
6848    unsafe {
6849        static_assert_uimm_bits!(IMM8, 8);
6850        if IMM8 >= 16 {
6851            _mm_setzero_si128()
6852        } else {
6853            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
6854            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
6855        }
6856    }
6857}
6858
6859/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6860///
6861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
6862#[inline]
6863#[target_feature(enable = "avx512bw")]
6864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6865#[cfg_attr(test, assert_instr(vpsllvw))]
6866pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
6867    unsafe {
6868        let count = count.as_u16x32();
6869        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
6870        let count = simd_select(no_overflow, count, u16x32::ZERO);
6871        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
6872    }
6873}
6874
6875/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6876///
6877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
6878#[inline]
6879#[target_feature(enable = "avx512bw")]
6880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6881#[cfg_attr(test, assert_instr(vpsllvw))]
6882pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6883    unsafe {
6884        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6885        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6886    }
6887}
6888
6889/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6890///
6891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
6892#[inline]
6893#[target_feature(enable = "avx512bw")]
6894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6895#[cfg_attr(test, assert_instr(vpsllvw))]
6896pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6897    unsafe {
6898        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6899        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6900    }
6901}
6902
6903/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6904///
6905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
6906#[inline]
6907#[target_feature(enable = "avx512bw,avx512vl")]
6908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6909#[cfg_attr(test, assert_instr(vpsllvw))]
6910pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
6911    unsafe {
6912        let count = count.as_u16x16();
6913        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
6914        let count = simd_select(no_overflow, count, u16x16::ZERO);
6915        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
6916    }
6917}
6918
6919/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6920///
6921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
6922#[inline]
6923#[target_feature(enable = "avx512bw,avx512vl")]
6924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6925#[cfg_attr(test, assert_instr(vpsllvw))]
6926pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6927    unsafe {
6928        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6929        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6930    }
6931}
6932
6933/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6934///
6935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
6936#[inline]
6937#[target_feature(enable = "avx512bw,avx512vl")]
6938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6939#[cfg_attr(test, assert_instr(vpsllvw))]
6940pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6941    unsafe {
6942        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6943        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6944    }
6945}
6946
6947/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6948///
6949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
6950#[inline]
6951#[target_feature(enable = "avx512bw,avx512vl")]
6952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6953#[cfg_attr(test, assert_instr(vpsllvw))]
6954pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
6955    unsafe {
6956        let count = count.as_u16x8();
6957        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
6958        let count = simd_select(no_overflow, count, u16x8::ZERO);
6959        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
6960    }
6961}
6962
6963/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6964///
6965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
6966#[inline]
6967#[target_feature(enable = "avx512bw,avx512vl")]
6968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6969#[cfg_attr(test, assert_instr(vpsllvw))]
6970pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6971    unsafe {
6972        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6973        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6974    }
6975}
6976
6977/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6978///
6979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
6980#[inline]
6981#[target_feature(enable = "avx512bw,avx512vl")]
6982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6983#[cfg_attr(test, assert_instr(vpsllvw))]
6984pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6985    unsafe {
6986        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6987        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6988    }
6989}
6990
6991/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
6992///
6993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
6994#[inline]
6995#[target_feature(enable = "avx512bw")]
6996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6997#[cfg_attr(test, assert_instr(vpsrlw))]
6998pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
6999    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
7000}
7001
7002/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7003///
7004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
7005#[inline]
7006#[target_feature(enable = "avx512bw")]
7007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7008#[cfg_attr(test, assert_instr(vpsrlw))]
7009pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7010    unsafe {
7011        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7012        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7013    }
7014}
7015
7016/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7017///
7018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
7019#[inline]
7020#[target_feature(enable = "avx512bw")]
7021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7022#[cfg_attr(test, assert_instr(vpsrlw))]
7023pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7024    unsafe {
7025        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7026        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7027    }
7028}
7029
7030/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7031///
7032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7033#[inline]
7034#[target_feature(enable = "avx512bw,avx512vl")]
7035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7036#[cfg_attr(test, assert_instr(vpsrlw))]
7037pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7038    unsafe {
7039        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7040        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7041    }
7042}
7043
7044/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7045///
7046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7047#[inline]
7048#[target_feature(enable = "avx512bw,avx512vl")]
7049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7050#[cfg_attr(test, assert_instr(vpsrlw))]
7051pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7052    unsafe {
7053        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7054        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7055    }
7056}
7057
7058/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7059///
7060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7061#[inline]
7062#[target_feature(enable = "avx512bw,avx512vl")]
7063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7064#[cfg_attr(test, assert_instr(vpsrlw))]
7065pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7066    unsafe {
7067        let shf = _mm_srl_epi16(a, count).as_i16x8();
7068        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7069    }
7070}
7071
7072/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7073///
7074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7075#[inline]
7076#[target_feature(enable = "avx512bw,avx512vl")]
7077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7078#[cfg_attr(test, assert_instr(vpsrlw))]
7079pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7080    unsafe {
7081        let shf = _mm_srl_epi16(a, count).as_i16x8();
7082        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7083    }
7084}
7085
7086/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7087///
7088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7089#[inline]
7090#[target_feature(enable = "avx512bw")]
7091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7092#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7093#[rustc_legacy_const_generics(1)]
7094pub fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7095    unsafe {
7096        static_assert_uimm_bits!(IMM8, 8);
7097        if IMM8 >= 16 {
7098            _mm512_setzero_si512()
7099        } else {
7100            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7101        }
7102    }
7103}
7104
7105/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7106///
7107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7108#[inline]
7109#[target_feature(enable = "avx512bw")]
7110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7111#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7112#[rustc_legacy_const_generics(3)]
7113pub fn _mm512_mask_srli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7114    unsafe {
7115        static_assert_uimm_bits!(IMM8, 8);
7116        let shf = if IMM8 >= 16 {
7117            u16x32::ZERO
7118        } else {
7119            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7120        };
7121        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7122    }
7123}
7124
7125/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7126///
7127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7128#[inline]
7129#[target_feature(enable = "avx512bw")]
7130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7131#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7132#[rustc_legacy_const_generics(2)]
7133pub fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7134    unsafe {
7135        static_assert_uimm_bits!(IMM8, 8);
7136        //imm8 should be u32, it seems the document to verify is incorrect
7137        if IMM8 >= 16 {
7138            _mm512_setzero_si512()
7139        } else {
7140            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7141            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7142        }
7143    }
7144}
7145
7146/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7147///
7148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7149#[inline]
7150#[target_feature(enable = "avx512bw,avx512vl")]
7151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7152#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7153#[rustc_legacy_const_generics(3)]
7154pub fn _mm256_mask_srli_epi16<const IMM8: i32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7155    unsafe {
7156        static_assert_uimm_bits!(IMM8, 8);
7157        let shf = _mm256_srli_epi16::<IMM8>(a);
7158        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7159    }
7160}
7161
7162/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7163///
7164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7165#[inline]
7166#[target_feature(enable = "avx512bw,avx512vl")]
7167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7168#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7169#[rustc_legacy_const_generics(2)]
7170pub fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7171    unsafe {
7172        static_assert_uimm_bits!(IMM8, 8);
7173        let shf = _mm256_srli_epi16::<IMM8>(a);
7174        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7175    }
7176}
7177
7178/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7179///
7180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7181#[inline]
7182#[target_feature(enable = "avx512bw,avx512vl")]
7183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7184#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7185#[rustc_legacy_const_generics(3)]
7186pub fn _mm_mask_srli_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7187    unsafe {
7188        static_assert_uimm_bits!(IMM8, 8);
7189        let shf = _mm_srli_epi16::<IMM8>(a);
7190        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7191    }
7192}
7193
7194/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7195///
7196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7197#[inline]
7198#[target_feature(enable = "avx512bw,avx512vl")]
7199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7200#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7201#[rustc_legacy_const_generics(2)]
7202pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7203    unsafe {
7204        static_assert_uimm_bits!(IMM8, 8);
7205        let shf = _mm_srli_epi16::<IMM8>(a);
7206        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7207    }
7208}
7209
7210/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7211///
7212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7213#[inline]
7214#[target_feature(enable = "avx512bw")]
7215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7216#[cfg_attr(test, assert_instr(vpsrlvw))]
7217pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7218    unsafe {
7219        let count = count.as_u16x32();
7220        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7221        let count = simd_select(no_overflow, count, u16x32::ZERO);
7222        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7223    }
7224}
7225
7226/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7227///
7228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7229#[inline]
7230#[target_feature(enable = "avx512bw")]
7231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7232#[cfg_attr(test, assert_instr(vpsrlvw))]
7233pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7234    unsafe {
7235        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7236        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7237    }
7238}
7239
7240/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7241///
7242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7243#[inline]
7244#[target_feature(enable = "avx512bw")]
7245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7246#[cfg_attr(test, assert_instr(vpsrlvw))]
7247pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7248    unsafe {
7249        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7250        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7251    }
7252}
7253
7254/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7255///
7256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7257#[inline]
7258#[target_feature(enable = "avx512bw,avx512vl")]
7259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7260#[cfg_attr(test, assert_instr(vpsrlvw))]
7261pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7262    unsafe {
7263        let count = count.as_u16x16();
7264        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7265        let count = simd_select(no_overflow, count, u16x16::ZERO);
7266        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7267    }
7268}
7269
7270/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7271///
7272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7273#[inline]
7274#[target_feature(enable = "avx512bw,avx512vl")]
7275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7276#[cfg_attr(test, assert_instr(vpsrlvw))]
7277pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7278    unsafe {
7279        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7280        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7281    }
7282}
7283
7284/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7285///
7286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7287#[inline]
7288#[target_feature(enable = "avx512bw,avx512vl")]
7289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7290#[cfg_attr(test, assert_instr(vpsrlvw))]
7291pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7292    unsafe {
7293        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7294        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7295    }
7296}
7297
7298/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7299///
7300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7301#[inline]
7302#[target_feature(enable = "avx512bw,avx512vl")]
7303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7304#[cfg_attr(test, assert_instr(vpsrlvw))]
7305pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7306    unsafe {
7307        let count = count.as_u16x8();
7308        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7309        let count = simd_select(no_overflow, count, u16x8::ZERO);
7310        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7311    }
7312}
7313
7314/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7315///
7316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7317#[inline]
7318#[target_feature(enable = "avx512bw,avx512vl")]
7319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7320#[cfg_attr(test, assert_instr(vpsrlvw))]
7321pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7322    unsafe {
7323        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7324        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7325    }
7326}
7327
7328/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7329///
7330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7331#[inline]
7332#[target_feature(enable = "avx512bw,avx512vl")]
7333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7334#[cfg_attr(test, assert_instr(vpsrlvw))]
7335pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7336    unsafe {
7337        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7338        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7339    }
7340}
7341
7342/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7343///
7344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7345#[inline]
7346#[target_feature(enable = "avx512bw")]
7347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7348#[cfg_attr(test, assert_instr(vpsraw))]
7349pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7350    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7351}
7352
7353/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7354///
7355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7356#[inline]
7357#[target_feature(enable = "avx512bw")]
7358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7359#[cfg_attr(test, assert_instr(vpsraw))]
7360pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7361    unsafe {
7362        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7363        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7364    }
7365}
7366
7367/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7368///
7369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7370#[inline]
7371#[target_feature(enable = "avx512bw")]
7372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7373#[cfg_attr(test, assert_instr(vpsraw))]
7374pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7375    unsafe {
7376        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7377        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7378    }
7379}
7380
7381/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7382///
7383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7384#[inline]
7385#[target_feature(enable = "avx512bw,avx512vl")]
7386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7387#[cfg_attr(test, assert_instr(vpsraw))]
7388pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7389    unsafe {
7390        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7391        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7392    }
7393}
7394
7395/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7396///
7397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7398#[inline]
7399#[target_feature(enable = "avx512bw,avx512vl")]
7400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7401#[cfg_attr(test, assert_instr(vpsraw))]
7402pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7403    unsafe {
7404        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7405        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7406    }
7407}
7408
7409/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7410///
7411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7412#[inline]
7413#[target_feature(enable = "avx512bw,avx512vl")]
7414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7415#[cfg_attr(test, assert_instr(vpsraw))]
7416pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7417    unsafe {
7418        let shf = _mm_sra_epi16(a, count).as_i16x8();
7419        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7420    }
7421}
7422
7423/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7424///
7425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7426#[inline]
7427#[target_feature(enable = "avx512bw,avx512vl")]
7428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7429#[cfg_attr(test, assert_instr(vpsraw))]
7430pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7431    unsafe {
7432        let shf = _mm_sra_epi16(a, count).as_i16x8();
7433        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7434    }
7435}
7436
7437/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7438///
7439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
7440#[inline]
7441#[target_feature(enable = "avx512bw")]
7442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7443#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7444#[rustc_legacy_const_generics(1)]
7445pub fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7446    unsafe {
7447        static_assert_uimm_bits!(IMM8, 8);
7448        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
7449    }
7450}
7451
7452/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7453///
7454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
7455#[inline]
7456#[target_feature(enable = "avx512bw")]
7457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7458#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7459#[rustc_legacy_const_generics(3)]
7460pub fn _mm512_mask_srai_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7461    unsafe {
7462        static_assert_uimm_bits!(IMM8, 8);
7463        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7464        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7465    }
7466}
7467
7468/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7469///
7470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
7471#[inline]
7472#[target_feature(enable = "avx512bw")]
7473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7474#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7475#[rustc_legacy_const_generics(2)]
7476pub fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7477    unsafe {
7478        static_assert_uimm_bits!(IMM8, 8);
7479        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7480        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7481    }
7482}
7483
7484/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7485///
7486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
7487#[inline]
7488#[target_feature(enable = "avx512bw,avx512vl")]
7489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7490#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7491#[rustc_legacy_const_generics(3)]
7492pub fn _mm256_mask_srai_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7493    unsafe {
7494        static_assert_uimm_bits!(IMM8, 8);
7495        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7496        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7497    }
7498}
7499
7500/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7501///
7502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
7503#[inline]
7504#[target_feature(enable = "avx512bw,avx512vl")]
7505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7506#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7507#[rustc_legacy_const_generics(2)]
7508pub fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7509    unsafe {
7510        static_assert_uimm_bits!(IMM8, 8);
7511        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7512        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
7513    }
7514}
7515
7516/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7517///
7518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
7519#[inline]
7520#[target_feature(enable = "avx512bw,avx512vl")]
7521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7522#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7523#[rustc_legacy_const_generics(3)]
7524pub fn _mm_mask_srai_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7525    unsafe {
7526        static_assert_uimm_bits!(IMM8, 8);
7527        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7528        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7529    }
7530}
7531
7532/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7533///
7534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
7535#[inline]
7536#[target_feature(enable = "avx512bw,avx512vl")]
7537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7538#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7539#[rustc_legacy_const_generics(2)]
7540pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7541    unsafe {
7542        static_assert_uimm_bits!(IMM8, 8);
7543        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7544        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
7545    }
7546}
7547
7548/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7549///
7550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
7551#[inline]
7552#[target_feature(enable = "avx512bw")]
7553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7554#[cfg_attr(test, assert_instr(vpsravw))]
7555pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
7556    unsafe {
7557        let count = count.as_u16x32();
7558        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7559        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
7560        simd_shr(a.as_i16x32(), count).as_m512i()
7561    }
7562}
7563
7564/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7565///
7566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
7567#[inline]
7568#[target_feature(enable = "avx512bw")]
7569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7570#[cfg_attr(test, assert_instr(vpsravw))]
7571pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7572    unsafe {
7573        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7574        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7575    }
7576}
7577
7578/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7579///
7580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
7581#[inline]
7582#[target_feature(enable = "avx512bw")]
7583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7584#[cfg_attr(test, assert_instr(vpsravw))]
7585pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7586    unsafe {
7587        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7588        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7589    }
7590}
7591
7592/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7593///
7594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
7595#[inline]
7596#[target_feature(enable = "avx512bw,avx512vl")]
7597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7598#[cfg_attr(test, assert_instr(vpsravw))]
7599pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
7600    unsafe {
7601        let count = count.as_u16x16();
7602        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7603        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
7604        simd_shr(a.as_i16x16(), count).as_m256i()
7605    }
7606}
7607
7608/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7609///
7610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
7611#[inline]
7612#[target_feature(enable = "avx512bw,avx512vl")]
7613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7614#[cfg_attr(test, assert_instr(vpsravw))]
7615pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7616    unsafe {
7617        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7618        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7619    }
7620}
7621
7622/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7623///
7624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
7625#[inline]
7626#[target_feature(enable = "avx512bw,avx512vl")]
7627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7628#[cfg_attr(test, assert_instr(vpsravw))]
7629pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7630    unsafe {
7631        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7632        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7633    }
7634}
7635
7636/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7637///
7638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
7639#[inline]
7640#[target_feature(enable = "avx512bw,avx512vl")]
7641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7642#[cfg_attr(test, assert_instr(vpsravw))]
7643pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
7644    unsafe {
7645        let count = count.as_u16x8();
7646        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7647        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
7648        simd_shr(a.as_i16x8(), count).as_m128i()
7649    }
7650}
7651
7652/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7653///
7654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
7655#[inline]
7656#[target_feature(enable = "avx512bw,avx512vl")]
7657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7658#[cfg_attr(test, assert_instr(vpsravw))]
7659pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7660    unsafe {
7661        let shf = _mm_srav_epi16(a, count).as_i16x8();
7662        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7663    }
7664}
7665
7666/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7667///
7668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
7669#[inline]
7670#[target_feature(enable = "avx512bw,avx512vl")]
7671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7672#[cfg_attr(test, assert_instr(vpsravw))]
7673pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7674    unsafe {
7675        let shf = _mm_srav_epi16(a, count).as_i16x8();
7676        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7677    }
7678}
7679
7680/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7681///
7682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
7683#[inline]
7684#[target_feature(enable = "avx512bw")]
7685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7686#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7687pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
7688    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
7689}
7690
7691/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7692///
7693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
7694#[inline]
7695#[target_feature(enable = "avx512bw")]
7696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7697#[cfg_attr(test, assert_instr(vpermt2w))]
7698pub fn _mm512_mask_permutex2var_epi16(
7699    a: __m512i,
7700    k: __mmask32,
7701    idx: __m512i,
7702    b: __m512i,
7703) -> __m512i {
7704    unsafe {
7705        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7706        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
7707    }
7708}
7709
7710/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7711///
7712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
7713#[inline]
7714#[target_feature(enable = "avx512bw")]
7715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7716#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7717pub fn _mm512_maskz_permutex2var_epi16(
7718    k: __mmask32,
7719    a: __m512i,
7720    idx: __m512i,
7721    b: __m512i,
7722) -> __m512i {
7723    unsafe {
7724        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7725        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7726    }
7727}
7728
7729/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7730///
7731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
7732#[inline]
7733#[target_feature(enable = "avx512bw")]
7734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7735#[cfg_attr(test, assert_instr(vpermi2w))]
7736pub fn _mm512_mask2_permutex2var_epi16(
7737    a: __m512i,
7738    idx: __m512i,
7739    k: __mmask32,
7740    b: __m512i,
7741) -> __m512i {
7742    unsafe {
7743        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7744        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
7745    }
7746}
7747
7748/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7749///
7750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
7751#[inline]
7752#[target_feature(enable = "avx512bw,avx512vl")]
7753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7754#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7755pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
7756    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
7757}
7758
7759/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7760///
7761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
7762#[inline]
7763#[target_feature(enable = "avx512bw,avx512vl")]
7764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7765#[cfg_attr(test, assert_instr(vpermt2w))]
7766pub fn _mm256_mask_permutex2var_epi16(
7767    a: __m256i,
7768    k: __mmask16,
7769    idx: __m256i,
7770    b: __m256i,
7771) -> __m256i {
7772    unsafe {
7773        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7774        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
7775    }
7776}
7777
7778/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7779///
7780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
7781#[inline]
7782#[target_feature(enable = "avx512bw,avx512vl")]
7783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7784#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7785pub fn _mm256_maskz_permutex2var_epi16(
7786    k: __mmask16,
7787    a: __m256i,
7788    idx: __m256i,
7789    b: __m256i,
7790) -> __m256i {
7791    unsafe {
7792        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7793        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7794    }
7795}
7796
7797/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7798///
7799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
7800#[inline]
7801#[target_feature(enable = "avx512bw,avx512vl")]
7802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7803#[cfg_attr(test, assert_instr(vpermi2w))]
7804pub fn _mm256_mask2_permutex2var_epi16(
7805    a: __m256i,
7806    idx: __m256i,
7807    k: __mmask16,
7808    b: __m256i,
7809) -> __m256i {
7810    unsafe {
7811        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7812        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
7813    }
7814}
7815
7816/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7817///
7818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
7819#[inline]
7820#[target_feature(enable = "avx512bw,avx512vl")]
7821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7822#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7823pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7824    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
7825}
7826
7827/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7828///
7829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
7830#[inline]
7831#[target_feature(enable = "avx512bw,avx512vl")]
7832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7833#[cfg_attr(test, assert_instr(vpermt2w))]
7834pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
7835    unsafe {
7836        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7837        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
7838    }
7839}
7840
7841/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7842///
7843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
7844#[inline]
7845#[target_feature(enable = "avx512bw,avx512vl")]
7846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7847#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7848pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7849    unsafe {
7850        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7851        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7852    }
7853}
7854
7855/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7856///
7857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
7858#[inline]
7859#[target_feature(enable = "avx512bw,avx512vl")]
7860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7861#[cfg_attr(test, assert_instr(vpermi2w))]
7862pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
7863    unsafe {
7864        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7865        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
7866    }
7867}
7868
7869/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7870///
7871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
7872#[inline]
7873#[target_feature(enable = "avx512bw")]
7874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7875#[cfg_attr(test, assert_instr(vpermw))]
7876pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
7877    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
7878}
7879
7880/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7881///
7882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
7883#[inline]
7884#[target_feature(enable = "avx512bw")]
7885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7886#[cfg_attr(test, assert_instr(vpermw))]
7887pub fn _mm512_mask_permutexvar_epi16(
7888    src: __m512i,
7889    k: __mmask32,
7890    idx: __m512i,
7891    a: __m512i,
7892) -> __m512i {
7893    unsafe {
7894        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7895        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
7896    }
7897}
7898
7899/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7900///
7901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
7902#[inline]
7903#[target_feature(enable = "avx512bw")]
7904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7905#[cfg_attr(test, assert_instr(vpermw))]
7906pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
7907    unsafe {
7908        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7909        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7910    }
7911}
7912
7913/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7914///
7915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
7916#[inline]
7917#[target_feature(enable = "avx512bw,avx512vl")]
7918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7919#[cfg_attr(test, assert_instr(vpermw))]
7920pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
7921    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
7922}
7923
7924/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7925///
7926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
7927#[inline]
7928#[target_feature(enable = "avx512bw,avx512vl")]
7929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7930#[cfg_attr(test, assert_instr(vpermw))]
7931pub fn _mm256_mask_permutexvar_epi16(
7932    src: __m256i,
7933    k: __mmask16,
7934    idx: __m256i,
7935    a: __m256i,
7936) -> __m256i {
7937    unsafe {
7938        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7939        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
7940    }
7941}
7942
7943/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7944///
7945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
7946#[inline]
7947#[target_feature(enable = "avx512bw,avx512vl")]
7948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7949#[cfg_attr(test, assert_instr(vpermw))]
7950pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
7951    unsafe {
7952        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7953        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7954    }
7955}
7956
7957/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7958///
7959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
7960#[inline]
7961#[target_feature(enable = "avx512bw,avx512vl")]
7962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7963#[cfg_attr(test, assert_instr(vpermw))]
7964pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
7965    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
7966}
7967
7968/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7969///
7970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
7971#[inline]
7972#[target_feature(enable = "avx512bw,avx512vl")]
7973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7974#[cfg_attr(test, assert_instr(vpermw))]
7975pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7976    unsafe {
7977        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7978        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
7979    }
7980}
7981
7982/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7983///
7984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
7985#[inline]
7986#[target_feature(enable = "avx512bw,avx512vl")]
7987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7988#[cfg_attr(test, assert_instr(vpermw))]
7989pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7990    unsafe {
7991        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7992        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7993    }
7994}
7995
7996/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7997///
7998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
7999#[inline]
8000#[target_feature(enable = "avx512bw")]
8001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8002#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8003pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8004    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
8005}
8006
8007/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8008///
8009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
8010#[inline]
8011#[target_feature(enable = "avx512bw,avx512vl")]
8012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8013#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8014pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8015    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
8016}
8017
8018/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8019///
8020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
8021#[inline]
8022#[target_feature(enable = "avx512bw,avx512vl")]
8023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8024#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8025pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8026    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
8027}
8028
8029/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8030///
8031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
8032#[inline]
8033#[target_feature(enable = "avx512bw")]
8034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8035#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8036pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8037    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
8038}
8039
8040/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8041///
8042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
8043#[inline]
8044#[target_feature(enable = "avx512bw,avx512vl")]
8045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8046#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8047pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8048    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
8049}
8050
8051/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8052///
8053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
8054#[inline]
8055#[target_feature(enable = "avx512bw,avx512vl")]
8056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8057#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8058pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8059    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8060}
8061
8062/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8063///
8064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8065#[inline]
8066#[target_feature(enable = "avx512bw")]
8067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8068#[cfg_attr(test, assert_instr(vpbroadcastw))]
8069pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8070    unsafe {
8071        let a = _mm512_castsi128_si512(a).as_i16x32();
8072        let ret: i16x32 = simd_shuffle!(
8073            a,
8074            a,
8075            [
8076                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8077                0, 0, 0, 0,
8078            ],
8079        );
8080        transmute(ret)
8081    }
8082}
8083
8084/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8085///
8086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8087#[inline]
8088#[target_feature(enable = "avx512bw")]
8089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8090#[cfg_attr(test, assert_instr(vpbroadcastw))]
8091pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8092    unsafe {
8093        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8094        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8095    }
8096}
8097
8098/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8099///
8100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8101#[inline]
8102#[target_feature(enable = "avx512bw")]
8103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8104#[cfg_attr(test, assert_instr(vpbroadcastw))]
8105pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8106    unsafe {
8107        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8108        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8109    }
8110}
8111
8112/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8113///
8114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8115#[inline]
8116#[target_feature(enable = "avx512bw,avx512vl")]
8117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8118#[cfg_attr(test, assert_instr(vpbroadcastw))]
8119pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8120    unsafe {
8121        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8122        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8123    }
8124}
8125
8126/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8129#[inline]
8130#[target_feature(enable = "avx512bw,avx512vl")]
8131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8132#[cfg_attr(test, assert_instr(vpbroadcastw))]
8133pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8134    unsafe {
8135        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8136        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8137    }
8138}
8139
8140/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8141///
8142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8143#[inline]
8144#[target_feature(enable = "avx512bw,avx512vl")]
8145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8146#[cfg_attr(test, assert_instr(vpbroadcastw))]
8147pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8148    unsafe {
8149        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8150        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8151    }
8152}
8153
8154/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8155///
8156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8157#[inline]
8158#[target_feature(enable = "avx512bw,avx512vl")]
8159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8160#[cfg_attr(test, assert_instr(vpbroadcastw))]
8161pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8162    unsafe {
8163        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8164        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8165    }
8166}
8167
8168/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8169///
8170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8171#[inline]
8172#[target_feature(enable = "avx512bw")]
8173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8174#[cfg_attr(test, assert_instr(vpbroadcastb))]
8175pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8176    unsafe {
8177        let a = _mm512_castsi128_si512(a).as_i8x64();
8178        let ret: i8x64 = simd_shuffle!(
8179            a,
8180            a,
8181            [
8182                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8183                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8184                0, 0, 0, 0, 0, 0, 0, 0,
8185            ],
8186        );
8187        transmute(ret)
8188    }
8189}
8190
8191/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8192///
8193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8194#[inline]
8195#[target_feature(enable = "avx512bw")]
8196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8197#[cfg_attr(test, assert_instr(vpbroadcastb))]
8198pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8199    unsafe {
8200        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8201        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8202    }
8203}
8204
8205/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8206///
8207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8208#[inline]
8209#[target_feature(enable = "avx512bw")]
8210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8211#[cfg_attr(test, assert_instr(vpbroadcastb))]
8212pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8213    unsafe {
8214        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8215        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8216    }
8217}
8218
8219/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8220///
8221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8222#[inline]
8223#[target_feature(enable = "avx512bw,avx512vl")]
8224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8225#[cfg_attr(test, assert_instr(vpbroadcastb))]
8226pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8227    unsafe {
8228        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8229        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8230    }
8231}
8232
8233/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8234///
8235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8236#[inline]
8237#[target_feature(enable = "avx512bw,avx512vl")]
8238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8239#[cfg_attr(test, assert_instr(vpbroadcastb))]
8240pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8241    unsafe {
8242        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8243        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8244    }
8245}
8246
8247/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8248///
8249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8250#[inline]
8251#[target_feature(enable = "avx512bw,avx512vl")]
8252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8253#[cfg_attr(test, assert_instr(vpbroadcastb))]
8254pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8255    unsafe {
8256        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8257        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8258    }
8259}
8260
8261/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8262///
8263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8264#[inline]
8265#[target_feature(enable = "avx512bw,avx512vl")]
8266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8267#[cfg_attr(test, assert_instr(vpbroadcastb))]
8268pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8269    unsafe {
8270        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8271        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8272    }
8273}
8274
8275/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8276///
8277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8278#[inline]
8279#[target_feature(enable = "avx512bw")]
8280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8281#[cfg_attr(test, assert_instr(vpunpckhwd))]
8282pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8283    unsafe {
8284        let a = a.as_i16x32();
8285        let b = b.as_i16x32();
8286        #[rustfmt::skip]
8287        let r: i16x32 = simd_shuffle!(
8288            a,
8289            b,
8290            [
8291                4, 32 + 4, 5, 32 + 5,
8292                6, 32 + 6, 7, 32 + 7,
8293                12, 32 + 12, 13, 32 + 13,
8294                14, 32 + 14, 15, 32 + 15,
8295                20, 32 + 20, 21, 32 + 21,
8296                22, 32 + 22, 23, 32 + 23,
8297                28, 32 + 28, 29, 32 + 29,
8298                30, 32 + 30, 31, 32 + 31,
8299            ],
8300        );
8301        transmute(r)
8302    }
8303}
8304
8305/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8306///
8307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8308#[inline]
8309#[target_feature(enable = "avx512bw")]
8310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8311#[cfg_attr(test, assert_instr(vpunpckhwd))]
8312pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8313    unsafe {
8314        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8315        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8316    }
8317}
8318
8319/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8320///
8321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8322#[inline]
8323#[target_feature(enable = "avx512bw")]
8324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8325#[cfg_attr(test, assert_instr(vpunpckhwd))]
8326pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8327    unsafe {
8328        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8329        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8330    }
8331}
8332
8333/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8334///
8335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8336#[inline]
8337#[target_feature(enable = "avx512bw,avx512vl")]
8338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8339#[cfg_attr(test, assert_instr(vpunpckhwd))]
8340pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8341    unsafe {
8342        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8343        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8344    }
8345}
8346
8347/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8350#[inline]
8351#[target_feature(enable = "avx512bw,avx512vl")]
8352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8353#[cfg_attr(test, assert_instr(vpunpckhwd))]
8354pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8355    unsafe {
8356        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8357        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8358    }
8359}
8360
8361/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8362///
8363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
8364#[inline]
8365#[target_feature(enable = "avx512bw,avx512vl")]
8366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8367#[cfg_attr(test, assert_instr(vpunpckhwd))]
8368pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8369    unsafe {
8370        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8371        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
8372    }
8373}
8374
8375/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
8378#[inline]
8379#[target_feature(enable = "avx512bw,avx512vl")]
8380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8381#[cfg_attr(test, assert_instr(vpunpckhwd))]
8382pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8383    unsafe {
8384        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8385        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
8386    }
8387}
8388
8389/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8390///
8391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
8392#[inline]
8393#[target_feature(enable = "avx512bw")]
8394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8395#[cfg_attr(test, assert_instr(vpunpckhbw))]
8396pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
8397    unsafe {
8398        let a = a.as_i8x64();
8399        let b = b.as_i8x64();
8400        #[rustfmt::skip]
8401        let r: i8x64 = simd_shuffle!(
8402            a,
8403            b,
8404            [
8405                8, 64 + 8, 9, 64 + 9,
8406                10, 64 + 10, 11, 64 + 11,
8407                12, 64 + 12, 13, 64 + 13,
8408                14, 64 + 14, 15, 64 + 15,
8409                24, 64 + 24, 25, 64 + 25,
8410                26, 64 + 26, 27, 64 + 27,
8411                28, 64 + 28, 29, 64 + 29,
8412                30, 64 + 30, 31, 64 + 31,
8413                40, 64 + 40, 41, 64 + 41,
8414                42, 64 + 42, 43, 64 + 43,
8415                44, 64 + 44, 45, 64 + 45,
8416                46, 64 + 46, 47, 64 + 47,
8417                56, 64 + 56, 57, 64 + 57,
8418                58, 64 + 58, 59, 64 + 59,
8419                60, 64 + 60, 61, 64 + 61,
8420                62, 64 + 62, 63, 64 + 63,
8421            ],
8422        );
8423        transmute(r)
8424    }
8425}
8426
8427/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8428///
8429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
8430#[inline]
8431#[target_feature(enable = "avx512bw")]
8432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8433#[cfg_attr(test, assert_instr(vpunpckhbw))]
8434pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8435    unsafe {
8436        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8437        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
8438    }
8439}
8440
8441/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8442///
8443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
8444#[inline]
8445#[target_feature(enable = "avx512bw")]
8446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8447#[cfg_attr(test, assert_instr(vpunpckhbw))]
8448pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8449    unsafe {
8450        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8451        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
8452    }
8453}
8454
8455/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8456///
8457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
8458#[inline]
8459#[target_feature(enable = "avx512bw,avx512vl")]
8460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8461#[cfg_attr(test, assert_instr(vpunpckhbw))]
8462pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8463    unsafe {
8464        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8465        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
8466    }
8467}
8468
8469/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8470///
8471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
8472#[inline]
8473#[target_feature(enable = "avx512bw,avx512vl")]
8474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8475#[cfg_attr(test, assert_instr(vpunpckhbw))]
8476pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8477    unsafe {
8478        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8479        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
8480    }
8481}
8482
8483/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8484///
8485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
8486#[inline]
8487#[target_feature(enable = "avx512bw,avx512vl")]
8488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8489#[cfg_attr(test, assert_instr(vpunpckhbw))]
8490pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8491    unsafe {
8492        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8493        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
8494    }
8495}
8496
8497/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
8500#[inline]
8501#[target_feature(enable = "avx512bw,avx512vl")]
8502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8503#[cfg_attr(test, assert_instr(vpunpckhbw))]
8504pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8505    unsafe {
8506        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8507        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
8508    }
8509}
8510
8511/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8512///
8513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
8514#[inline]
8515#[target_feature(enable = "avx512bw")]
8516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8517#[cfg_attr(test, assert_instr(vpunpcklwd))]
8518pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
8519    unsafe {
8520        let a = a.as_i16x32();
8521        let b = b.as_i16x32();
8522        #[rustfmt::skip]
8523        let r: i16x32 = simd_shuffle!(
8524            a,
8525            b,
8526            [
8527               0,  32+0,   1, 32+1,
8528               2,  32+2,   3, 32+3,
8529               8,  32+8,   9, 32+9,
8530               10, 32+10, 11, 32+11,
8531               16, 32+16, 17, 32+17,
8532               18, 32+18, 19, 32+19,
8533               24, 32+24, 25, 32+25,
8534               26, 32+26, 27, 32+27
8535            ],
8536        );
8537        transmute(r)
8538    }
8539}
8540
8541/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8542///
8543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
8544#[inline]
8545#[target_feature(enable = "avx512bw")]
8546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8547#[cfg_attr(test, assert_instr(vpunpcklwd))]
8548pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8549    unsafe {
8550        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8551        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
8552    }
8553}
8554
8555/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8556///
8557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
8558#[inline]
8559#[target_feature(enable = "avx512bw")]
8560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8561#[cfg_attr(test, assert_instr(vpunpcklwd))]
8562pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8563    unsafe {
8564        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8565        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
8566    }
8567}
8568
8569/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8570///
8571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
8572#[inline]
8573#[target_feature(enable = "avx512bw,avx512vl")]
8574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8575#[cfg_attr(test, assert_instr(vpunpcklwd))]
8576pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8577    unsafe {
8578        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8579        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
8580    }
8581}
8582
8583/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8584///
8585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
8586#[inline]
8587#[target_feature(enable = "avx512bw,avx512vl")]
8588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8589#[cfg_attr(test, assert_instr(vpunpcklwd))]
8590pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8591    unsafe {
8592        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8593        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
8594    }
8595}
8596
8597/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8598///
8599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
8600#[inline]
8601#[target_feature(enable = "avx512bw,avx512vl")]
8602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8603#[cfg_attr(test, assert_instr(vpunpcklwd))]
8604pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8605    unsafe {
8606        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8607        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
8608    }
8609}
8610
8611/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8612///
8613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
8614#[inline]
8615#[target_feature(enable = "avx512bw,avx512vl")]
8616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8617#[cfg_attr(test, assert_instr(vpunpcklwd))]
8618pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8619    unsafe {
8620        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8621        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
8622    }
8623}
8624
8625/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8626///
8627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
8628#[inline]
8629#[target_feature(enable = "avx512bw")]
8630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8631#[cfg_attr(test, assert_instr(vpunpcklbw))]
8632pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
8633    unsafe {
8634        let a = a.as_i8x64();
8635        let b = b.as_i8x64();
8636        #[rustfmt::skip]
8637        let r: i8x64 = simd_shuffle!(
8638            a,
8639            b,
8640            [
8641                0,  64+0,   1, 64+1,
8642                2,  64+2,   3, 64+3,
8643                4,  64+4,   5, 64+5,
8644                6,  64+6,   7, 64+7,
8645                16, 64+16, 17, 64+17,
8646                18, 64+18, 19, 64+19,
8647                20, 64+20, 21, 64+21,
8648                22, 64+22, 23, 64+23,
8649                32, 64+32, 33, 64+33,
8650                34, 64+34, 35, 64+35,
8651                36, 64+36, 37, 64+37,
8652                38, 64+38, 39, 64+39,
8653                48, 64+48, 49, 64+49,
8654                50, 64+50, 51, 64+51,
8655                52, 64+52, 53, 64+53,
8656                54, 64+54, 55, 64+55,
8657            ],
8658        );
8659        transmute(r)
8660    }
8661}
8662
8663/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8664///
8665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
8666#[inline]
8667#[target_feature(enable = "avx512bw")]
8668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8669#[cfg_attr(test, assert_instr(vpunpcklbw))]
8670pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8671    unsafe {
8672        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8673        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
8674    }
8675}
8676
8677/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8678///
8679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
8680#[inline]
8681#[target_feature(enable = "avx512bw")]
8682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8683#[cfg_attr(test, assert_instr(vpunpcklbw))]
8684pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8685    unsafe {
8686        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8687        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
8688    }
8689}
8690
8691/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8692///
8693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
8694#[inline]
8695#[target_feature(enable = "avx512bw,avx512vl")]
8696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8697#[cfg_attr(test, assert_instr(vpunpcklbw))]
8698pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8699    unsafe {
8700        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8701        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
8702    }
8703}
8704
8705/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8706///
8707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
8708#[inline]
8709#[target_feature(enable = "avx512bw,avx512vl")]
8710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8711#[cfg_attr(test, assert_instr(vpunpcklbw))]
8712pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8713    unsafe {
8714        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8715        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
8716    }
8717}
8718
8719/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8720///
8721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
8722#[inline]
8723#[target_feature(enable = "avx512bw,avx512vl")]
8724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8725#[cfg_attr(test, assert_instr(vpunpcklbw))]
8726pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8727    unsafe {
8728        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8729        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
8730    }
8731}
8732
8733/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8734///
8735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
8736#[inline]
8737#[target_feature(enable = "avx512bw,avx512vl")]
8738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8739#[cfg_attr(test, assert_instr(vpunpcklbw))]
8740pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8741    unsafe {
8742        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8743        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
8744    }
8745}
8746
8747/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8748///
8749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
8750#[inline]
8751#[target_feature(enable = "avx512bw")]
8752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8753#[cfg_attr(test, assert_instr(vmovdqu16))]
8754pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
8755    unsafe {
8756        let mov = a.as_i16x32();
8757        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
8758    }
8759}
8760
8761/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8762///
8763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
8764#[inline]
8765#[target_feature(enable = "avx512bw")]
8766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8767#[cfg_attr(test, assert_instr(vmovdqu16))]
8768pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
8769    unsafe {
8770        let mov = a.as_i16x32();
8771        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
8772    }
8773}
8774
8775/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8776///
8777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
8778#[inline]
8779#[target_feature(enable = "avx512bw,avx512vl")]
8780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8781#[cfg_attr(test, assert_instr(vmovdqu16))]
8782pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
8783    unsafe {
8784        let mov = a.as_i16x16();
8785        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
8786    }
8787}
8788
8789/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8790///
8791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
8792#[inline]
8793#[target_feature(enable = "avx512bw,avx512vl")]
8794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8795#[cfg_attr(test, assert_instr(vmovdqu16))]
8796pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
8797    unsafe {
8798        let mov = a.as_i16x16();
8799        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
8800    }
8801}
8802
8803/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8804///
8805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
8806#[inline]
8807#[target_feature(enable = "avx512bw,avx512vl")]
8808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8809#[cfg_attr(test, assert_instr(vmovdqu16))]
8810pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8811    unsafe {
8812        let mov = a.as_i16x8();
8813        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
8814    }
8815}
8816
8817/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8818///
8819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
8820#[inline]
8821#[target_feature(enable = "avx512bw,avx512vl")]
8822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8823#[cfg_attr(test, assert_instr(vmovdqu16))]
8824pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
8825    unsafe {
8826        let mov = a.as_i16x8();
8827        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
8828    }
8829}
8830
8831/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8832///
8833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
8834#[inline]
8835#[target_feature(enable = "avx512bw")]
8836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8837#[cfg_attr(test, assert_instr(vmovdqu8))]
8838pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
8839    unsafe {
8840        let mov = a.as_i8x64();
8841        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
8842    }
8843}
8844
8845/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8846///
8847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
8848#[inline]
8849#[target_feature(enable = "avx512bw")]
8850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8851#[cfg_attr(test, assert_instr(vmovdqu8))]
8852pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
8853    unsafe {
8854        let mov = a.as_i8x64();
8855        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
8856    }
8857}
8858
8859/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8860///
8861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
8862#[inline]
8863#[target_feature(enable = "avx512bw,avx512vl")]
8864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8865#[cfg_attr(test, assert_instr(vmovdqu8))]
8866pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
8867    unsafe {
8868        let mov = a.as_i8x32();
8869        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
8870    }
8871}
8872
8873/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8874///
8875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
8876#[inline]
8877#[target_feature(enable = "avx512bw,avx512vl")]
8878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8879#[cfg_attr(test, assert_instr(vmovdqu8))]
8880pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
8881    unsafe {
8882        let mov = a.as_i8x32();
8883        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
8884    }
8885}
8886
8887/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8888///
8889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
8890#[inline]
8891#[target_feature(enable = "avx512bw,avx512vl")]
8892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8893#[cfg_attr(test, assert_instr(vmovdqu8))]
8894pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8895    unsafe {
8896        let mov = a.as_i8x16();
8897        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
8898    }
8899}
8900
8901/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8902///
8903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
8904#[inline]
8905#[target_feature(enable = "avx512bw,avx512vl")]
8906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8907#[cfg_attr(test, assert_instr(vmovdqu8))]
8908pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
8909    unsafe {
8910        let mov = a.as_i8x16();
8911        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
8912    }
8913}
8914
8915/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
8918#[inline]
8919#[target_feature(enable = "avx512bw")]
8920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8921#[cfg_attr(test, assert_instr(vpbroadcastw))]
8922pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
8923    unsafe {
8924        let r = _mm512_set1_epi16(a).as_i16x32();
8925        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
8926    }
8927}
8928
8929/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8930///
8931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
8932#[inline]
8933#[target_feature(enable = "avx512bw")]
8934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8935#[cfg_attr(test, assert_instr(vpbroadcastw))]
8936pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
8937    unsafe {
8938        let r = _mm512_set1_epi16(a).as_i16x32();
8939        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
8940    }
8941}
8942
8943/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8944///
8945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
8946#[inline]
8947#[target_feature(enable = "avx512bw,avx512vl")]
8948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8949#[cfg_attr(test, assert_instr(vpbroadcastw))]
8950pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
8951    unsafe {
8952        let r = _mm256_set1_epi16(a).as_i16x16();
8953        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8954    }
8955}
8956
8957/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8958///
8959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
8960#[inline]
8961#[target_feature(enable = "avx512bw,avx512vl")]
8962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8963#[cfg_attr(test, assert_instr(vpbroadcastw))]
8964pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
8965    unsafe {
8966        let r = _mm256_set1_epi16(a).as_i16x16();
8967        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8968    }
8969}
8970
8971/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8972///
8973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
8974#[inline]
8975#[target_feature(enable = "avx512bw,avx512vl")]
8976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8977#[cfg_attr(test, assert_instr(vpbroadcastw))]
8978pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
8979    unsafe {
8980        let r = _mm_set1_epi16(a).as_i16x8();
8981        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8982    }
8983}
8984
8985/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8986///
8987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
8988#[inline]
8989#[target_feature(enable = "avx512bw,avx512vl")]
8990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8991#[cfg_attr(test, assert_instr(vpbroadcastw))]
8992pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
8993    unsafe {
8994        let r = _mm_set1_epi16(a).as_i16x8();
8995        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8996    }
8997}
8998
8999/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9000///
9001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
9002#[inline]
9003#[target_feature(enable = "avx512bw")]
9004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9005#[cfg_attr(test, assert_instr(vpbroadcast))]
9006pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
9007    unsafe {
9008        let r = _mm512_set1_epi8(a).as_i8x64();
9009        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
9010    }
9011}
9012
9013/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9014///
9015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
9016#[inline]
9017#[target_feature(enable = "avx512bw")]
9018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9019#[cfg_attr(test, assert_instr(vpbroadcast))]
9020pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
9021    unsafe {
9022        let r = _mm512_set1_epi8(a).as_i8x64();
9023        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
9024    }
9025}
9026
9027/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9028///
9029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
9030#[inline]
9031#[target_feature(enable = "avx512bw,avx512vl")]
9032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9033#[cfg_attr(test, assert_instr(vpbroadcast))]
9034pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
9035    unsafe {
9036        let r = _mm256_set1_epi8(a).as_i8x32();
9037        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
9038    }
9039}
9040
9041/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9042///
9043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
9044#[inline]
9045#[target_feature(enable = "avx512bw,avx512vl")]
9046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9047#[cfg_attr(test, assert_instr(vpbroadcast))]
9048pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
9049    unsafe {
9050        let r = _mm256_set1_epi8(a).as_i8x32();
9051        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
9052    }
9053}
9054
9055/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9056///
9057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9058#[inline]
9059#[target_feature(enable = "avx512bw,avx512vl")]
9060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9061#[cfg_attr(test, assert_instr(vpbroadcast))]
9062pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9063    unsafe {
9064        let r = _mm_set1_epi8(a).as_i8x16();
9065        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9066    }
9067}
9068
9069/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9070///
9071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9072#[inline]
9073#[target_feature(enable = "avx512bw,avx512vl")]
9074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9075#[cfg_attr(test, assert_instr(vpbroadcast))]
9076pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9077    unsafe {
9078        let r = _mm_set1_epi8(a).as_i8x16();
9079        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9080    }
9081}
9082
9083/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9084///
9085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9086#[inline]
9087#[target_feature(enable = "avx512bw")]
9088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9089#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9090#[rustc_legacy_const_generics(1)]
9091pub fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9092    unsafe {
9093        static_assert_uimm_bits!(IMM8, 8);
9094        let a = a.as_i16x32();
9095        let r: i16x32 = simd_shuffle!(
9096            a,
9097            a,
9098            [
9099                IMM8 as u32 & 0b11,
9100                (IMM8 as u32 >> 2) & 0b11,
9101                (IMM8 as u32 >> 4) & 0b11,
9102                (IMM8 as u32 >> 6) & 0b11,
9103                4,
9104                5,
9105                6,
9106                7,
9107                (IMM8 as u32 & 0b11) + 8,
9108                ((IMM8 as u32 >> 2) & 0b11) + 8,
9109                ((IMM8 as u32 >> 4) & 0b11) + 8,
9110                ((IMM8 as u32 >> 6) & 0b11) + 8,
9111                12,
9112                13,
9113                14,
9114                15,
9115                (IMM8 as u32 & 0b11) + 16,
9116                ((IMM8 as u32 >> 2) & 0b11) + 16,
9117                ((IMM8 as u32 >> 4) & 0b11) + 16,
9118                ((IMM8 as u32 >> 6) & 0b11) + 16,
9119                20,
9120                21,
9121                22,
9122                23,
9123                (IMM8 as u32 & 0b11) + 24,
9124                ((IMM8 as u32 >> 2) & 0b11) + 24,
9125                ((IMM8 as u32 >> 4) & 0b11) + 24,
9126                ((IMM8 as u32 >> 6) & 0b11) + 24,
9127                28,
9128                29,
9129                30,
9130                31,
9131            ],
9132        );
9133        transmute(r)
9134    }
9135}
9136
9137/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9138///
9139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9140#[inline]
9141#[target_feature(enable = "avx512bw")]
9142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9143#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9144#[rustc_legacy_const_generics(3)]
9145pub fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9146    src: __m512i,
9147    k: __mmask32,
9148    a: __m512i,
9149) -> __m512i {
9150    unsafe {
9151        static_assert_uimm_bits!(IMM8, 8);
9152        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9153        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9154    }
9155}
9156
9157/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9158///
9159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9160#[inline]
9161#[target_feature(enable = "avx512bw")]
9162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9163#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9164#[rustc_legacy_const_generics(2)]
9165pub fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9166    unsafe {
9167        static_assert_uimm_bits!(IMM8, 8);
9168        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9169        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9170    }
9171}
9172
9173/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9174///
9175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9176#[inline]
9177#[target_feature(enable = "avx512bw,avx512vl")]
9178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9179#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9180#[rustc_legacy_const_generics(3)]
9181pub fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9182    src: __m256i,
9183    k: __mmask16,
9184    a: __m256i,
9185) -> __m256i {
9186    unsafe {
9187        static_assert_uimm_bits!(IMM8, 8);
9188        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9189        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9190    }
9191}
9192
9193/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9194///
9195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9196#[inline]
9197#[target_feature(enable = "avx512bw,avx512vl")]
9198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9199#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9200#[rustc_legacy_const_generics(2)]
9201pub fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9202    unsafe {
9203        static_assert_uimm_bits!(IMM8, 8);
9204        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9205        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9206    }
9207}
9208
9209/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9212#[inline]
9213#[target_feature(enable = "avx512bw,avx512vl")]
9214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9215#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm_mask_shufflelo_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9218    unsafe {
9219        static_assert_uimm_bits!(IMM8, 8);
9220        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9221        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9222    }
9223}
9224
9225/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9226///
9227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9228#[inline]
9229#[target_feature(enable = "avx512bw,avx512vl")]
9230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9231#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9232#[rustc_legacy_const_generics(2)]
9233pub fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9234    unsafe {
9235        static_assert_uimm_bits!(IMM8, 8);
9236        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9237        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9238    }
9239}
9240
9241/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9242///
9243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9244#[inline]
9245#[target_feature(enable = "avx512bw")]
9246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9247#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9248#[rustc_legacy_const_generics(1)]
9249pub fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9250    unsafe {
9251        static_assert_uimm_bits!(IMM8, 8);
9252        let a = a.as_i16x32();
9253        let r: i16x32 = simd_shuffle!(
9254            a,
9255            a,
9256            [
9257                0,
9258                1,
9259                2,
9260                3,
9261                (IMM8 as u32 & 0b11) + 4,
9262                ((IMM8 as u32 >> 2) & 0b11) + 4,
9263                ((IMM8 as u32 >> 4) & 0b11) + 4,
9264                ((IMM8 as u32 >> 6) & 0b11) + 4,
9265                8,
9266                9,
9267                10,
9268                11,
9269                (IMM8 as u32 & 0b11) + 12,
9270                ((IMM8 as u32 >> 2) & 0b11) + 12,
9271                ((IMM8 as u32 >> 4) & 0b11) + 12,
9272                ((IMM8 as u32 >> 6) & 0b11) + 12,
9273                16,
9274                17,
9275                18,
9276                19,
9277                (IMM8 as u32 & 0b11) + 20,
9278                ((IMM8 as u32 >> 2) & 0b11) + 20,
9279                ((IMM8 as u32 >> 4) & 0b11) + 20,
9280                ((IMM8 as u32 >> 6) & 0b11) + 20,
9281                24,
9282                25,
9283                26,
9284                27,
9285                (IMM8 as u32 & 0b11) + 28,
9286                ((IMM8 as u32 >> 2) & 0b11) + 28,
9287                ((IMM8 as u32 >> 4) & 0b11) + 28,
9288                ((IMM8 as u32 >> 6) & 0b11) + 28,
9289            ],
9290        );
9291        transmute(r)
9292    }
9293}
9294
9295/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9296///
9297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
9298#[inline]
9299#[target_feature(enable = "avx512bw")]
9300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9301#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9302#[rustc_legacy_const_generics(3)]
9303pub fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
9304    src: __m512i,
9305    k: __mmask32,
9306    a: __m512i,
9307) -> __m512i {
9308    unsafe {
9309        static_assert_uimm_bits!(IMM8, 8);
9310        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9311        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9312    }
9313}
9314
9315/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9316///
9317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
9318#[inline]
9319#[target_feature(enable = "avx512bw")]
9320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9321#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9322#[rustc_legacy_const_generics(2)]
9323pub fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9324    unsafe {
9325        static_assert_uimm_bits!(IMM8, 8);
9326        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9327        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9328    }
9329}
9330
9331/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9332///
9333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
9334#[inline]
9335#[target_feature(enable = "avx512bw,avx512vl")]
9336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9337#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9338#[rustc_legacy_const_generics(3)]
9339pub fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
9340    src: __m256i,
9341    k: __mmask16,
9342    a: __m256i,
9343) -> __m256i {
9344    unsafe {
9345        static_assert_uimm_bits!(IMM8, 8);
9346        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9347        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9348    }
9349}
9350
9351/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9352///
9353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
9354#[inline]
9355#[target_feature(enable = "avx512bw,avx512vl")]
9356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9357#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9358#[rustc_legacy_const_generics(2)]
9359pub fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9360    unsafe {
9361        static_assert_uimm_bits!(IMM8, 8);
9362        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9363        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9364    }
9365}
9366
9367/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9368///
9369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
9370#[inline]
9371#[target_feature(enable = "avx512bw,avx512vl")]
9372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9373#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9374#[rustc_legacy_const_generics(3)]
9375pub fn _mm_mask_shufflehi_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9376    unsafe {
9377        static_assert_uimm_bits!(IMM8, 8);
9378        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9379        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9380    }
9381}
9382
9383/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9384///
9385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
9386#[inline]
9387#[target_feature(enable = "avx512bw,avx512vl")]
9388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9389#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9390#[rustc_legacy_const_generics(2)]
9391pub fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9392    unsafe {
9393        static_assert_uimm_bits!(IMM8, 8);
9394        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9395        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9396    }
9397}
9398
9399/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
9400///
9401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
9402#[inline]
9403#[target_feature(enable = "avx512bw")]
9404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9405#[cfg_attr(test, assert_instr(vpshufb))]
9406pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
9407    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
9408}
9409
9410/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9411///
9412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
9413#[inline]
9414#[target_feature(enable = "avx512bw")]
9415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9416#[cfg_attr(test, assert_instr(vpshufb))]
9417pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9418    unsafe {
9419        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9420        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
9421    }
9422}
9423
9424/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9425///
9426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
9427#[inline]
9428#[target_feature(enable = "avx512bw")]
9429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9430#[cfg_attr(test, assert_instr(vpshufb))]
9431pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9432    unsafe {
9433        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9434        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
9435    }
9436}
9437
9438/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9439///
9440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
9441#[inline]
9442#[target_feature(enable = "avx512bw,avx512vl")]
9443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9444#[cfg_attr(test, assert_instr(vpshufb))]
9445pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9446    unsafe {
9447        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9448        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
9449    }
9450}
9451
9452/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9453///
9454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
9455#[inline]
9456#[target_feature(enable = "avx512bw,avx512vl")]
9457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9458#[cfg_attr(test, assert_instr(vpshufb))]
9459pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9460    unsafe {
9461        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9462        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
9463    }
9464}
9465
9466/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9467///
9468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
9469#[inline]
9470#[target_feature(enable = "avx512bw,avx512vl")]
9471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9472#[cfg_attr(test, assert_instr(vpshufb))]
9473pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9474    unsafe {
9475        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9476        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
9477    }
9478}
9479
9480/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9481///
9482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
9483#[inline]
9484#[target_feature(enable = "avx512bw,avx512vl")]
9485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9486#[cfg_attr(test, assert_instr(vpshufb))]
9487pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9488    unsafe {
9489        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9490        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
9491    }
9492}
9493
9494/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9495///
9496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
9497#[inline]
9498#[target_feature(enable = "avx512bw")]
9499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9500#[cfg_attr(test, assert_instr(vptestmw))]
9501pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9502    let and = _mm512_and_si512(a, b);
9503    let zero = _mm512_setzero_si512();
9504    _mm512_cmpneq_epi16_mask(and, zero)
9505}
9506
9507/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9508///
9509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
9510#[inline]
9511#[target_feature(enable = "avx512bw")]
9512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9513#[cfg_attr(test, assert_instr(vptestmw))]
9514pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9515    let and = _mm512_and_si512(a, b);
9516    let zero = _mm512_setzero_si512();
9517    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
9518}
9519
9520/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9521///
9522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
9523#[inline]
9524#[target_feature(enable = "avx512bw,avx512vl")]
9525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9526#[cfg_attr(test, assert_instr(vptestmw))]
9527pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9528    let and = _mm256_and_si256(a, b);
9529    let zero = _mm256_setzero_si256();
9530    _mm256_cmpneq_epi16_mask(and, zero)
9531}
9532
9533/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9534///
9535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
9536#[inline]
9537#[target_feature(enable = "avx512bw,avx512vl")]
9538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9539#[cfg_attr(test, assert_instr(vptestmw))]
9540pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9541    let and = _mm256_and_si256(a, b);
9542    let zero = _mm256_setzero_si256();
9543    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
9544}
9545
9546/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9547///
9548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
9549#[inline]
9550#[target_feature(enable = "avx512bw,avx512vl")]
9551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9552#[cfg_attr(test, assert_instr(vptestmw))]
9553pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9554    let and = _mm_and_si128(a, b);
9555    let zero = _mm_setzero_si128();
9556    _mm_cmpneq_epi16_mask(and, zero)
9557}
9558
9559/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9560///
9561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
9562#[inline]
9563#[target_feature(enable = "avx512bw,avx512vl")]
9564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9565#[cfg_attr(test, assert_instr(vptestmw))]
9566pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9567    let and = _mm_and_si128(a, b);
9568    let zero = _mm_setzero_si128();
9569    _mm_mask_cmpneq_epi16_mask(k, and, zero)
9570}
9571
9572/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
9575#[inline]
9576#[target_feature(enable = "avx512bw")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vptestmb))]
9579pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9580    let and = _mm512_and_si512(a, b);
9581    let zero = _mm512_setzero_si512();
9582    _mm512_cmpneq_epi8_mask(and, zero)
9583}
9584
9585/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9586///
9587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
9588#[inline]
9589#[target_feature(enable = "avx512bw")]
9590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9591#[cfg_attr(test, assert_instr(vptestmb))]
9592pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9593    let and = _mm512_and_si512(a, b);
9594    let zero = _mm512_setzero_si512();
9595    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
9596}
9597
9598/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9599///
9600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
9601#[inline]
9602#[target_feature(enable = "avx512bw,avx512vl")]
9603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9604#[cfg_attr(test, assert_instr(vptestmb))]
9605pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9606    let and = _mm256_and_si256(a, b);
9607    let zero = _mm256_setzero_si256();
9608    _mm256_cmpneq_epi8_mask(and, zero)
9609}
9610
9611/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9612///
9613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
9614#[inline]
9615#[target_feature(enable = "avx512bw,avx512vl")]
9616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9617#[cfg_attr(test, assert_instr(vptestmb))]
9618pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9619    let and = _mm256_and_si256(a, b);
9620    let zero = _mm256_setzero_si256();
9621    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
9622}
9623
9624/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9625///
9626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
9627#[inline]
9628#[target_feature(enable = "avx512bw,avx512vl")]
9629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9630#[cfg_attr(test, assert_instr(vptestmb))]
9631pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9632    let and = _mm_and_si128(a, b);
9633    let zero = _mm_setzero_si128();
9634    _mm_cmpneq_epi8_mask(and, zero)
9635}
9636
9637/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
9640#[inline]
9641#[target_feature(enable = "avx512bw,avx512vl")]
9642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9643#[cfg_attr(test, assert_instr(vptestmb))]
9644pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9645    let and = _mm_and_si128(a, b);
9646    let zero = _mm_setzero_si128();
9647    _mm_mask_cmpneq_epi8_mask(k, and, zero)
9648}
9649
9650/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9651///
9652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
9653#[inline]
9654#[target_feature(enable = "avx512bw")]
9655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9656#[cfg_attr(test, assert_instr(vptestnmw))]
9657pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9658    let and = _mm512_and_si512(a, b);
9659    let zero = _mm512_setzero_si512();
9660    _mm512_cmpeq_epi16_mask(and, zero)
9661}
9662
9663/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9664///
9665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
9666#[inline]
9667#[target_feature(enable = "avx512bw")]
9668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9669#[cfg_attr(test, assert_instr(vptestnmw))]
9670pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9671    let and = _mm512_and_si512(a, b);
9672    let zero = _mm512_setzero_si512();
9673    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
9674}
9675
9676/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9677///
9678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
9679#[inline]
9680#[target_feature(enable = "avx512bw,avx512vl")]
9681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9682#[cfg_attr(test, assert_instr(vptestnmw))]
9683pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9684    let and = _mm256_and_si256(a, b);
9685    let zero = _mm256_setzero_si256();
9686    _mm256_cmpeq_epi16_mask(and, zero)
9687}
9688
9689/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9690///
9691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
9692#[inline]
9693#[target_feature(enable = "avx512bw,avx512vl")]
9694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9695#[cfg_attr(test, assert_instr(vptestnmw))]
9696pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9697    let and = _mm256_and_si256(a, b);
9698    let zero = _mm256_setzero_si256();
9699    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
9700}
9701
9702/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9703///
9704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
9705#[inline]
9706#[target_feature(enable = "avx512bw,avx512vl")]
9707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9708#[cfg_attr(test, assert_instr(vptestnmw))]
9709pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9710    let and = _mm_and_si128(a, b);
9711    let zero = _mm_setzero_si128();
9712    _mm_cmpeq_epi16_mask(and, zero)
9713}
9714
9715/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
9718#[inline]
9719#[target_feature(enable = "avx512bw,avx512vl")]
9720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9721#[cfg_attr(test, assert_instr(vptestnmw))]
9722pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9723    let and = _mm_and_si128(a, b);
9724    let zero = _mm_setzero_si128();
9725    _mm_mask_cmpeq_epi16_mask(k, and, zero)
9726}
9727
9728/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9729///
9730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
9731#[inline]
9732#[target_feature(enable = "avx512bw")]
9733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9734#[cfg_attr(test, assert_instr(vptestnmb))]
9735pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9736    let and = _mm512_and_si512(a, b);
9737    let zero = _mm512_setzero_si512();
9738    _mm512_cmpeq_epi8_mask(and, zero)
9739}
9740
9741/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9742///
9743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
9744#[inline]
9745#[target_feature(enable = "avx512bw")]
9746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9747#[cfg_attr(test, assert_instr(vptestnmb))]
9748pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9749    let and = _mm512_and_si512(a, b);
9750    let zero = _mm512_setzero_si512();
9751    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
9752}
9753
9754/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9755///
9756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
9757#[inline]
9758#[target_feature(enable = "avx512bw,avx512vl")]
9759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9760#[cfg_attr(test, assert_instr(vptestnmb))]
9761pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9762    let and = _mm256_and_si256(a, b);
9763    let zero = _mm256_setzero_si256();
9764    _mm256_cmpeq_epi8_mask(and, zero)
9765}
9766
9767/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9768///
9769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
9770#[inline]
9771#[target_feature(enable = "avx512bw,avx512vl")]
9772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9773#[cfg_attr(test, assert_instr(vptestnmb))]
9774pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9775    let and = _mm256_and_si256(a, b);
9776    let zero = _mm256_setzero_si256();
9777    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
9778}
9779
9780/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
9783#[inline]
9784#[target_feature(enable = "avx512bw,avx512vl")]
9785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9786#[cfg_attr(test, assert_instr(vptestnmb))]
9787pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9788    let and = _mm_and_si128(a, b);
9789    let zero = _mm_setzero_si128();
9790    _mm_cmpeq_epi8_mask(and, zero)
9791}
9792
9793/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9794///
9795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
9796#[inline]
9797#[target_feature(enable = "avx512bw,avx512vl")]
9798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9799#[cfg_attr(test, assert_instr(vptestnmb))]
9800pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9801    let and = _mm_and_si128(a, b);
9802    let zero = _mm_setzero_si128();
9803    _mm_mask_cmpeq_epi8_mask(k, and, zero)
9804}
9805
9806/// Store 64-bit mask from a into memory.
9807///
9808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
9809#[inline]
9810#[target_feature(enable = "avx512bw")]
9811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9812#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9813pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
9814    ptr::write(mem_addr as *mut __mmask64, a);
9815}
9816
9817/// Store 32-bit mask from a into memory.
9818///
9819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
9820#[inline]
9821#[target_feature(enable = "avx512bw")]
9822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9823#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9824pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
9825    ptr::write(mem_addr as *mut __mmask32, a);
9826}
9827
9828/// Load 64-bit mask from memory into k.
9829///
9830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
9831#[inline]
9832#[target_feature(enable = "avx512bw")]
9833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9834#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9835pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
9836    ptr::read(mem_addr as *const __mmask64)
9837}
9838
9839/// Load 32-bit mask from memory into k.
9840///
9841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
9842#[inline]
9843#[target_feature(enable = "avx512bw")]
9844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9845#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9846pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
9847    ptr::read(mem_addr as *const __mmask32)
9848}
9849
9850/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
9851///
9852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
9853#[inline]
9854#[target_feature(enable = "avx512bw")]
9855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9856#[cfg_attr(test, assert_instr(vpsadbw))]
9857pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
9858    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
9859}
9860
9861/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
9864#[inline]
9865#[target_feature(enable = "avx512bw")]
9866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9867#[rustc_legacy_const_generics(2)]
9868#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9869pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
9870    unsafe {
9871        static_assert_uimm_bits!(IMM8, 8);
9872        let a = a.as_u8x64();
9873        let b = b.as_u8x64();
9874        let r = vdbpsadbw(a, b, IMM8);
9875        transmute(r)
9876    }
9877}
9878
9879/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9880///
9881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
9882#[inline]
9883#[target_feature(enable = "avx512bw")]
9884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9885#[rustc_legacy_const_generics(4)]
9886#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9887pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
9888    src: __m512i,
9889    k: __mmask32,
9890    a: __m512i,
9891    b: __m512i,
9892) -> __m512i {
9893    unsafe {
9894        static_assert_uimm_bits!(IMM8, 8);
9895        let a = a.as_u8x64();
9896        let b = b.as_u8x64();
9897        let r = vdbpsadbw(a, b, IMM8);
9898        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
9899    }
9900}
9901
9902/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9903///
9904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
9905#[inline]
9906#[target_feature(enable = "avx512bw")]
9907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9908#[rustc_legacy_const_generics(3)]
9909#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9910pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9911    unsafe {
9912        static_assert_uimm_bits!(IMM8, 8);
9913        let a = a.as_u8x64();
9914        let b = b.as_u8x64();
9915        let r = vdbpsadbw(a, b, IMM8);
9916        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
9917    }
9918}
9919
9920/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9921///
9922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
9923#[inline]
9924#[target_feature(enable = "avx512bw,avx512vl")]
9925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9926#[rustc_legacy_const_generics(2)]
9927#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9928pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
9929    unsafe {
9930        static_assert_uimm_bits!(IMM8, 8);
9931        let a = a.as_u8x32();
9932        let b = b.as_u8x32();
9933        let r = vdbpsadbw256(a, b, IMM8);
9934        transmute(r)
9935    }
9936}
9937
9938/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9939///
9940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
9941#[inline]
9942#[target_feature(enable = "avx512bw,avx512vl")]
9943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9944#[rustc_legacy_const_generics(4)]
9945#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9946pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
9947    src: __m256i,
9948    k: __mmask16,
9949    a: __m256i,
9950    b: __m256i,
9951) -> __m256i {
9952    unsafe {
9953        static_assert_uimm_bits!(IMM8, 8);
9954        let a = a.as_u8x32();
9955        let b = b.as_u8x32();
9956        let r = vdbpsadbw256(a, b, IMM8);
9957        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
9958    }
9959}
9960
9961/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9962///
9963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
9964#[inline]
9965#[target_feature(enable = "avx512bw,avx512vl")]
9966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9967#[rustc_legacy_const_generics(3)]
9968#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9969pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9970    unsafe {
9971        static_assert_uimm_bits!(IMM8, 8);
9972        let a = a.as_u8x32();
9973        let b = b.as_u8x32();
9974        let r = vdbpsadbw256(a, b, IMM8);
9975        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
9976    }
9977}
9978
9979/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9980///
9981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
9982#[inline]
9983#[target_feature(enable = "avx512bw,avx512vl")]
9984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9985#[rustc_legacy_const_generics(2)]
9986#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9987pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
9988    unsafe {
9989        static_assert_uimm_bits!(IMM8, 8);
9990        let a = a.as_u8x16();
9991        let b = b.as_u8x16();
9992        let r = vdbpsadbw128(a, b, IMM8);
9993        transmute(r)
9994    }
9995}
9996
9997/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9998///
9999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
10000#[inline]
10001#[target_feature(enable = "avx512bw,avx512vl")]
10002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10003#[rustc_legacy_const_generics(4)]
10004#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10005pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
10006    src: __m128i,
10007    k: __mmask8,
10008    a: __m128i,
10009    b: __m128i,
10010) -> __m128i {
10011    unsafe {
10012        static_assert_uimm_bits!(IMM8, 8);
10013        let a = a.as_u8x16();
10014        let b = b.as_u8x16();
10015        let r = vdbpsadbw128(a, b, IMM8);
10016        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
10017    }
10018}
10019
10020/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10021///
10022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
10023#[inline]
10024#[target_feature(enable = "avx512bw,avx512vl")]
10025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10026#[rustc_legacy_const_generics(3)]
10027#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10028pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
10029    unsafe {
10030        static_assert_uimm_bits!(IMM8, 8);
10031        let a = a.as_u8x16();
10032        let b = b.as_u8x16();
10033        let r = vdbpsadbw128(a, b, IMM8);
10034        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
10035    }
10036}
10037
10038/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10039///
10040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
10041#[inline]
10042#[target_feature(enable = "avx512bw")]
10043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10044#[cfg_attr(test, assert_instr(vpmovw2m))]
10045pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
10046    let filter = _mm512_set1_epi16(1 << 15);
10047    let a = _mm512_and_si512(a, filter);
10048    _mm512_cmpeq_epi16_mask(a, filter)
10049}
10050
10051/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10052///
10053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
10054#[inline]
10055#[target_feature(enable = "avx512bw,avx512vl")]
10056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10057#[cfg_attr(test, assert_instr(vpmovw2m))]
10058pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10059    let filter = _mm256_set1_epi16(1 << 15);
10060    let a = _mm256_and_si256(a, filter);
10061    _mm256_cmpeq_epi16_mask(a, filter)
10062}
10063
10064/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10065///
10066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
10067#[inline]
10068#[target_feature(enable = "avx512bw,avx512vl")]
10069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10070#[cfg_attr(test, assert_instr(vpmovw2m))]
10071pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10072    let filter = _mm_set1_epi16(1 << 15);
10073    let a = _mm_and_si128(a, filter);
10074    _mm_cmpeq_epi16_mask(a, filter)
10075}
10076
10077/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10078///
10079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10080#[inline]
10081#[target_feature(enable = "avx512bw")]
10082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10083#[cfg_attr(test, assert_instr(vpmovb2m))]
10084pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10085    let filter = _mm512_set1_epi8(1 << 7);
10086    let a = _mm512_and_si512(a, filter);
10087    _mm512_cmpeq_epi8_mask(a, filter)
10088}
10089
10090/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10091///
10092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10093#[inline]
10094#[target_feature(enable = "avx512bw,avx512vl")]
10095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10096#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10097// using vpmovb2m plus converting the mask register to a standard register.
10098pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10099    let filter = _mm256_set1_epi8(1 << 7);
10100    let a = _mm256_and_si256(a, filter);
10101    _mm256_cmpeq_epi8_mask(a, filter)
10102}
10103
10104/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10105///
10106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10107#[inline]
10108#[target_feature(enable = "avx512bw,avx512vl")]
10109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10110#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10111// using vpmovb2m plus converting the mask register to a standard register.
10112pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10113    let filter = _mm_set1_epi8(1 << 7);
10114    let a = _mm_and_si128(a, filter);
10115    _mm_cmpeq_epi8_mask(a, filter)
10116}
10117
10118/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10119///
10120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10121#[inline]
10122#[target_feature(enable = "avx512bw")]
10123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10124#[cfg_attr(test, assert_instr(vpmovm2w))]
10125pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10126    unsafe {
10127        let one = _mm512_set1_epi16(
10128            1 << 15
10129                | 1 << 14
10130                | 1 << 13
10131                | 1 << 12
10132                | 1 << 11
10133                | 1 << 10
10134                | 1 << 9
10135                | 1 << 8
10136                | 1 << 7
10137                | 1 << 6
10138                | 1 << 5
10139                | 1 << 4
10140                | 1 << 3
10141                | 1 << 2
10142                | 1 << 1
10143                | 1 << 0,
10144        )
10145        .as_i16x32();
10146        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10147    }
10148}
10149
10150/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10151///
10152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10153#[inline]
10154#[target_feature(enable = "avx512bw,avx512vl")]
10155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10156#[cfg_attr(test, assert_instr(vpmovm2w))]
10157pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10158    unsafe {
10159        let one = _mm256_set1_epi16(
10160            1 << 15
10161                | 1 << 14
10162                | 1 << 13
10163                | 1 << 12
10164                | 1 << 11
10165                | 1 << 10
10166                | 1 << 9
10167                | 1 << 8
10168                | 1 << 7
10169                | 1 << 6
10170                | 1 << 5
10171                | 1 << 4
10172                | 1 << 3
10173                | 1 << 2
10174                | 1 << 1
10175                | 1 << 0,
10176        )
10177        .as_i16x16();
10178        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10179    }
10180}
10181
10182/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10183///
10184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10185#[inline]
10186#[target_feature(enable = "avx512bw,avx512vl")]
10187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10188#[cfg_attr(test, assert_instr(vpmovm2w))]
10189pub fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10190    unsafe {
10191        let one = _mm_set1_epi16(
10192            1 << 15
10193                | 1 << 14
10194                | 1 << 13
10195                | 1 << 12
10196                | 1 << 11
10197                | 1 << 10
10198                | 1 << 9
10199                | 1 << 8
10200                | 1 << 7
10201                | 1 << 6
10202                | 1 << 5
10203                | 1 << 4
10204                | 1 << 3
10205                | 1 << 2
10206                | 1 << 1
10207                | 1 << 0,
10208        )
10209        .as_i16x8();
10210        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10211    }
10212}
10213
10214/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10215///
10216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10217#[inline]
10218#[target_feature(enable = "avx512bw")]
10219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10220#[cfg_attr(test, assert_instr(vpmovm2b))]
10221pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10222    unsafe {
10223        let one =
10224            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10225                .as_i8x64();
10226        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
10227    }
10228}
10229
10230/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10231///
10232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
10233#[inline]
10234#[target_feature(enable = "avx512bw,avx512vl")]
10235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10236#[cfg_attr(test, assert_instr(vpmovm2b))]
10237pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
10238    unsafe {
10239        let one =
10240            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10241                .as_i8x32();
10242        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
10243    }
10244}
10245
10246/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10247///
10248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
10249#[inline]
10250#[target_feature(enable = "avx512bw,avx512vl")]
10251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10252#[cfg_attr(test, assert_instr(vpmovm2b))]
10253pub fn _mm_movm_epi8(k: __mmask16) -> __m128i {
10254    unsafe {
10255        let one =
10256            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10257                .as_i8x16();
10258        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
10259    }
10260}
10261
10262/// Convert 32-bit mask a into an integer value, and store the result in dst.
10263///
10264/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
10265#[inline]
10266#[target_feature(enable = "avx512bw")]
10267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10268pub fn _cvtmask32_u32(a: __mmask32) -> u32 {
10269    a
10270}
10271
10272/// Convert integer value a into an 32-bit mask, and store the result in k.
10273///
10274/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
10275#[inline]
10276#[target_feature(enable = "avx512bw")]
10277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10278pub fn _cvtu32_mask32(a: u32) -> __mmask32 {
10279    a
10280}
10281
10282/// Add 32-bit masks in a and b, and store the result in k.
10283///
10284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
10285#[inline]
10286#[target_feature(enable = "avx512bw")]
10287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10288pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10289    a + b
10290}
10291
10292/// Add 64-bit masks in a and b, and store the result in k.
10293///
10294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
10295#[inline]
10296#[target_feature(enable = "avx512bw")]
10297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10298pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10299    a + b
10300}
10301
10302/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
10303///
10304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
10305#[inline]
10306#[target_feature(enable = "avx512bw")]
10307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10308pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10309    a & b
10310}
10311
10312/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
10313///
10314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
10315#[inline]
10316#[target_feature(enable = "avx512bw")]
10317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10318pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10319    a & b
10320}
10321
10322/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
10323///
10324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
10325#[inline]
10326#[target_feature(enable = "avx512bw")]
10327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10328pub fn _knot_mask32(a: __mmask32) -> __mmask32 {
10329    !a
10330}
10331
10332/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
10333///
10334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
10335#[inline]
10336#[target_feature(enable = "avx512bw")]
10337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10338pub fn _knot_mask64(a: __mmask64) -> __mmask64 {
10339    !a
10340}
10341
10342/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
10343///
10344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
10345#[inline]
10346#[target_feature(enable = "avx512bw")]
10347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10348pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10349    _knot_mask32(a) & b
10350}
10351
10352/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
10353///
10354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
10355#[inline]
10356#[target_feature(enable = "avx512bw")]
10357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10358pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10359    _knot_mask64(a) & b
10360}
10361
10362/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
10363///
10364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
10365#[inline]
10366#[target_feature(enable = "avx512bw")]
10367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10368pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10369    a | b
10370}
10371
10372/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
10373///
10374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
10375#[inline]
10376#[target_feature(enable = "avx512bw")]
10377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10378pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10379    a | b
10380}
10381
10382/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
10383///
10384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
10385#[inline]
10386#[target_feature(enable = "avx512bw")]
10387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10388pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10389    a ^ b
10390}
10391
10392/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
10393///
10394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
10395#[inline]
10396#[target_feature(enable = "avx512bw")]
10397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10398pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10399    a ^ b
10400}
10401
10402/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
10403///
10404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
10405#[inline]
10406#[target_feature(enable = "avx512bw")]
10407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10408pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10409    _knot_mask32(a ^ b)
10410}
10411
10412/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
10415#[inline]
10416#[target_feature(enable = "avx512bw")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10419    _knot_mask64(a ^ b)
10420}
10421
10422/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10423/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10424///
10425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
10426#[inline]
10427#[target_feature(enable = "avx512bw")]
10428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10429pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
10430    let tmp = _kor_mask32(a, b);
10431    *all_ones = (tmp == 0xffffffff) as u8;
10432    (tmp == 0) as u8
10433}
10434
10435/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10436/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10437///
10438/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
10439#[inline]
10440#[target_feature(enable = "avx512bw")]
10441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10442pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
10443    let tmp = _kor_mask64(a, b);
10444    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
10445    (tmp == 0) as u8
10446}
10447
10448/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10449/// store 0 in dst.
10450///
10451/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
10452#[inline]
10453#[target_feature(enable = "avx512bw")]
10454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10455pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10456    (_kor_mask32(a, b) == 0xffffffff) as u8
10457}
10458
10459/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10460/// store 0 in dst.
10461///
10462/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
10463#[inline]
10464#[target_feature(enable = "avx512bw")]
10465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10466pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10467    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
10468}
10469
10470/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10471/// store 0 in dst.
10472///
10473/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
10474#[inline]
10475#[target_feature(enable = "avx512bw")]
10476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10477pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10478    (_kor_mask32(a, b) == 0) as u8
10479}
10480
10481/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10482/// store 0 in dst.
10483///
10484/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
10485#[inline]
10486#[target_feature(enable = "avx512bw")]
10487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10488pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10489    (_kor_mask64(a, b) == 0) as u8
10490}
10491
10492/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10493///
10494/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
10495#[inline]
10496#[target_feature(enable = "avx512bw")]
10497#[rustc_legacy_const_generics(1)]
10498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10499pub fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10500    a.unbounded_shl(COUNT)
10501}
10502
10503/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10504///
10505/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
10506#[inline]
10507#[target_feature(enable = "avx512bw")]
10508#[rustc_legacy_const_generics(1)]
10509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10510pub fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10511    a.unbounded_shl(COUNT)
10512}
10513
10514/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10515///
10516/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
10517#[inline]
10518#[target_feature(enable = "avx512bw")]
10519#[rustc_legacy_const_generics(1)]
10520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10521pub fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10522    a.unbounded_shr(COUNT)
10523}
10524
10525/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10526///
10527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
10528#[inline]
10529#[target_feature(enable = "avx512bw")]
10530#[rustc_legacy_const_generics(1)]
10531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10532pub fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10533    a.unbounded_shr(COUNT)
10534}
10535
10536/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
10537/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10538/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10539///
10540/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
10541#[inline]
10542#[target_feature(enable = "avx512bw")]
10543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10544pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
10545    *and_not = (_kandn_mask32(a, b) == 0) as u8;
10546    (_kand_mask32(a, b) == 0) as u8
10547}
10548
10549/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
10550/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10551/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10552///
10553/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
10554#[inline]
10555#[target_feature(enable = "avx512bw")]
10556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10557pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
10558    *and_not = (_kandn_mask64(a, b) == 0) as u8;
10559    (_kand_mask64(a, b) == 0) as u8
10560}
10561
10562/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
10563/// zeros, store 1 in dst, otherwise store 0 in dst.
10564///
10565/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
10566#[inline]
10567#[target_feature(enable = "avx512bw")]
10568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10569pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10570    (_kandn_mask32(a, b) == 0) as u8
10571}
10572
10573/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
10574/// zeros, store 1 in dst, otherwise store 0 in dst.
10575///
10576/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
10577#[inline]
10578#[target_feature(enable = "avx512bw")]
10579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10580pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10581    (_kandn_mask64(a, b) == 0) as u8
10582}
10583
10584/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10585/// store 0 in dst.
10586///
10587/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
10588#[inline]
10589#[target_feature(enable = "avx512bw")]
10590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10591pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10592    (_kand_mask32(a, b) == 0) as u8
10593}
10594
10595/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10596/// store 0 in dst.
10597///
10598/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
10599#[inline]
10600#[target_feature(enable = "avx512bw")]
10601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10602pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10603    (_kand_mask64(a, b) == 0) as u8
10604}
10605
10606/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
10607///
10608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
10609#[inline]
10610#[target_feature(enable = "avx512bw")]
10611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10612#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
10613pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
10614    ((a & 0xffff) << 16) | (b & 0xffff)
10615}
10616
10617/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
10618///
10619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
10620#[inline]
10621#[target_feature(enable = "avx512bw")]
10622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10623#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
10624pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
10625    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
10626}
10627
10628/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10629///
10630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
10631#[inline]
10632#[target_feature(enable = "avx512bw")]
10633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10634#[cfg_attr(test, assert_instr(vpmovwb))]
10635pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
10636    unsafe {
10637        let a = a.as_i16x32();
10638        transmute::<i8x32, _>(simd_cast(a))
10639    }
10640}
10641
10642/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10643///
10644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
10645#[inline]
10646#[target_feature(enable = "avx512bw")]
10647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10648#[cfg_attr(test, assert_instr(vpmovwb))]
10649pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10650    unsafe {
10651        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10652        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
10653    }
10654}
10655
10656/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10657///
10658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
10659#[inline]
10660#[target_feature(enable = "avx512bw")]
10661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10662#[cfg_attr(test, assert_instr(vpmovwb))]
10663pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10664    unsafe {
10665        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10666        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
10667    }
10668}
10669
10670/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10671///
10672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
10673#[inline]
10674#[target_feature(enable = "avx512bw,avx512vl")]
10675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10676#[cfg_attr(test, assert_instr(vpmovwb))]
10677pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
10678    unsafe {
10679        let a = a.as_i16x16();
10680        transmute::<i8x16, _>(simd_cast(a))
10681    }
10682}
10683
10684/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10685///
10686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
10687#[inline]
10688#[target_feature(enable = "avx512bw,avx512vl")]
10689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10690#[cfg_attr(test, assert_instr(vpmovwb))]
10691pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10692    unsafe {
10693        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10694        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10695    }
10696}
10697
10698/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10699///
10700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
10701#[inline]
10702#[target_feature(enable = "avx512bw,avx512vl")]
10703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10704#[cfg_attr(test, assert_instr(vpmovwb))]
10705pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10706    unsafe {
10707        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10708        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10709    }
10710}
10711
10712/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10713///
10714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
10715#[inline]
10716#[target_feature(enable = "avx512bw,avx512vl")]
10717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10718#[cfg_attr(test, assert_instr(vpmovwb))]
10719pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
10720    unsafe {
10721        let a = a.as_i16x8();
10722        let v256: i16x16 = simd_shuffle!(
10723            a,
10724            i16x8::ZERO,
10725            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
10726        );
10727        transmute::<i8x16, _>(simd_cast(v256))
10728    }
10729}
10730
10731/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10732///
10733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
10734#[inline]
10735#[target_feature(enable = "avx512bw,avx512vl")]
10736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10737#[cfg_attr(test, assert_instr(vpmovwb))]
10738pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10739    unsafe {
10740        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10741        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10742        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10743    }
10744}
10745
10746/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10747///
10748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
10749#[inline]
10750#[target_feature(enable = "avx512bw,avx512vl")]
10751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10752#[cfg_attr(test, assert_instr(vpmovwb))]
10753pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10754    unsafe {
10755        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10756        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10757        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10758    }
10759}
10760
10761/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10762///
10763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
10764#[inline]
10765#[target_feature(enable = "avx512bw")]
10766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10767#[cfg_attr(test, assert_instr(vpmovswb))]
10768pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
10769    unsafe {
10770        transmute(vpmovswb(
10771            a.as_i16x32(),
10772            i8x32::ZERO,
10773            0b11111111_11111111_11111111_11111111,
10774        ))
10775    }
10776}
10777
10778/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10779///
10780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
10781#[inline]
10782#[target_feature(enable = "avx512bw")]
10783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10784#[cfg_attr(test, assert_instr(vpmovswb))]
10785pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10786    unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
10787}
10788
10789/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10790///
10791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
10792#[inline]
10793#[target_feature(enable = "avx512bw")]
10794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10795#[cfg_attr(test, assert_instr(vpmovswb))]
10796pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10797    unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) }
10798}
10799
10800/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10801///
10802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
10803#[inline]
10804#[target_feature(enable = "avx512bw,avx512vl")]
10805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10806#[cfg_attr(test, assert_instr(vpmovswb))]
10807pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
10808    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
10809}
10810
10811/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10812///
10813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
10814#[inline]
10815#[target_feature(enable = "avx512bw,avx512vl")]
10816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10817#[cfg_attr(test, assert_instr(vpmovswb))]
10818pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10819    unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
10820}
10821
10822/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10823///
10824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
10825#[inline]
10826#[target_feature(enable = "avx512bw,avx512vl")]
10827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10828#[cfg_attr(test, assert_instr(vpmovswb))]
10829pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10830    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) }
10831}
10832
10833/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10834///
10835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
10836#[inline]
10837#[target_feature(enable = "avx512bw,avx512vl")]
10838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10839#[cfg_attr(test, assert_instr(vpmovswb))]
10840pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
10841    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
10842}
10843
10844/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10845///
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
10847#[inline]
10848#[target_feature(enable = "avx512bw,avx512vl")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vpmovswb))]
10851pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10852    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
10853}
10854
10855/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10856///
10857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
10858#[inline]
10859#[target_feature(enable = "avx512bw,avx512vl")]
10860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10861#[cfg_attr(test, assert_instr(vpmovswb))]
10862pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10863    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
10864}
10865
10866/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10867///
10868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
10869#[inline]
10870#[target_feature(enable = "avx512bw")]
10871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10872#[cfg_attr(test, assert_instr(vpmovuswb))]
10873pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
10874    unsafe {
10875        transmute(vpmovuswb(
10876            a.as_u16x32(),
10877            u8x32::ZERO,
10878            0b11111111_11111111_11111111_11111111,
10879        ))
10880    }
10881}
10882
10883/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10884///
10885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
10886#[inline]
10887#[target_feature(enable = "avx512bw")]
10888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10889#[cfg_attr(test, assert_instr(vpmovuswb))]
10890pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10891    unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
10892}
10893
10894/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10895///
10896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
10897#[inline]
10898#[target_feature(enable = "avx512bw")]
10899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10900#[cfg_attr(test, assert_instr(vpmovuswb))]
10901pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10902    unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) }
10903}
10904
10905/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10906///
10907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
10908#[inline]
10909#[target_feature(enable = "avx512bw,avx512vl")]
10910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10911#[cfg_attr(test, assert_instr(vpmovuswb))]
10912pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
10913    unsafe {
10914        transmute(vpmovuswb256(
10915            a.as_u16x16(),
10916            u8x16::ZERO,
10917            0b11111111_11111111,
10918        ))
10919    }
10920}
10921
10922/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10923///
10924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
10925#[inline]
10926#[target_feature(enable = "avx512bw,avx512vl")]
10927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10928#[cfg_attr(test, assert_instr(vpmovuswb))]
10929pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10930    unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
10931}
10932
10933/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10934///
10935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
10936#[inline]
10937#[target_feature(enable = "avx512bw,avx512vl")]
10938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10939#[cfg_attr(test, assert_instr(vpmovuswb))]
10940pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10941    unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) }
10942}
10943
10944/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10945///
10946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
10947#[inline]
10948#[target_feature(enable = "avx512bw,avx512vl")]
10949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10950#[cfg_attr(test, assert_instr(vpmovuswb))]
10951pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
10952    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
10953}
10954
10955/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10956///
10957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
10958#[inline]
10959#[target_feature(enable = "avx512bw,avx512vl")]
10960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10961#[cfg_attr(test, assert_instr(vpmovuswb))]
10962pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10963    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
10964}
10965
10966/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10967///
10968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
10969#[inline]
10970#[target_feature(enable = "avx512bw,avx512vl")]
10971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10972#[cfg_attr(test, assert_instr(vpmovuswb))]
10973pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10974    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
10975}
10976
10977/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
10978///
10979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
10980#[inline]
10981#[target_feature(enable = "avx512bw")]
10982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10983#[cfg_attr(test, assert_instr(vpmovsxbw))]
10984pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
10985    unsafe {
10986        let a = a.as_i8x32();
10987        transmute::<i16x32, _>(simd_cast(a))
10988    }
10989}
10990
10991/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10992///
10993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
10994#[inline]
10995#[target_feature(enable = "avx512bw")]
10996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10997#[cfg_attr(test, assert_instr(vpmovsxbw))]
10998pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
10999    unsafe {
11000        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11001        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11002    }
11003}
11004
11005/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11006///
11007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
11008#[inline]
11009#[target_feature(enable = "avx512bw")]
11010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11011#[cfg_attr(test, assert_instr(vpmovsxbw))]
11012pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11013    unsafe {
11014        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11015        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11016    }
11017}
11018
11019/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11020///
11021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
11022#[inline]
11023#[target_feature(enable = "avx512bw,avx512vl")]
11024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11025#[cfg_attr(test, assert_instr(vpmovsxbw))]
11026pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11027    unsafe {
11028        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11029        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11030    }
11031}
11032
11033/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11034///
11035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
11036#[inline]
11037#[target_feature(enable = "avx512bw,avx512vl")]
11038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11039#[cfg_attr(test, assert_instr(vpmovsxbw))]
11040pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11041    unsafe {
11042        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11043        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11044    }
11045}
11046
11047/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11048///
11049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
11050#[inline]
11051#[target_feature(enable = "avx512bw,avx512vl")]
11052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11053#[cfg_attr(test, assert_instr(vpmovsxbw))]
11054pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11055    unsafe {
11056        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11057        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11058    }
11059}
11060
11061/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11062///
11063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
11064#[inline]
11065#[target_feature(enable = "avx512bw,avx512vl")]
11066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11067#[cfg_attr(test, assert_instr(vpmovsxbw))]
11068pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11069    unsafe {
11070        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11071        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11072    }
11073}
11074
11075/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11076///
11077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11078#[inline]
11079#[target_feature(enable = "avx512bw")]
11080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11081#[cfg_attr(test, assert_instr(vpmovzxbw))]
11082pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11083    unsafe {
11084        let a = a.as_u8x32();
11085        transmute::<i16x32, _>(simd_cast(a))
11086    }
11087}
11088
11089/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11090///
11091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11092#[inline]
11093#[target_feature(enable = "avx512bw")]
11094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11095#[cfg_attr(test, assert_instr(vpmovzxbw))]
11096pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11097    unsafe {
11098        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11099        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11100    }
11101}
11102
11103/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11104///
11105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11106#[inline]
11107#[target_feature(enable = "avx512bw")]
11108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11109#[cfg_attr(test, assert_instr(vpmovzxbw))]
11110pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11111    unsafe {
11112        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11113        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11114    }
11115}
11116
11117/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11118///
11119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11120#[inline]
11121#[target_feature(enable = "avx512bw,avx512vl")]
11122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11123#[cfg_attr(test, assert_instr(vpmovzxbw))]
11124pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11125    unsafe {
11126        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11127        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11128    }
11129}
11130
11131/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11132///
11133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11134#[inline]
11135#[target_feature(enable = "avx512bw,avx512vl")]
11136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11137#[cfg_attr(test, assert_instr(vpmovzxbw))]
11138pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11139    unsafe {
11140        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11141        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11142    }
11143}
11144
11145/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11146///
11147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11148#[inline]
11149#[target_feature(enable = "avx512bw,avx512vl")]
11150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11151#[cfg_attr(test, assert_instr(vpmovzxbw))]
11152pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11153    unsafe {
11154        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11155        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11156    }
11157}
11158
11159/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11160///
11161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
11162#[inline]
11163#[target_feature(enable = "avx512bw,avx512vl")]
11164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11165#[cfg_attr(test, assert_instr(vpmovzxbw))]
11166pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11167    unsafe {
11168        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11169        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11170    }
11171}
11172
11173/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
11174///
11175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
11176#[inline]
11177#[target_feature(enable = "avx512bw")]
11178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11179#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
11180#[rustc_legacy_const_generics(1)]
11181pub fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11182    unsafe {
11183        static_assert_uimm_bits!(IMM8, 8);
11184        const fn mask(shift: i32, i: u32) -> u32 {
11185            let shift = shift as u32 & 0xff;
11186            if shift > 15 || i % 16 < shift {
11187                0
11188            } else {
11189                64 + (i - shift)
11190            }
11191        }
11192        let a = a.as_i8x64();
11193        let zero = i8x64::ZERO;
11194        let r: i8x64 = simd_shuffle!(
11195            zero,
11196            a,
11197            [
11198                mask(IMM8, 0),
11199                mask(IMM8, 1),
11200                mask(IMM8, 2),
11201                mask(IMM8, 3),
11202                mask(IMM8, 4),
11203                mask(IMM8, 5),
11204                mask(IMM8, 6),
11205                mask(IMM8, 7),
11206                mask(IMM8, 8),
11207                mask(IMM8, 9),
11208                mask(IMM8, 10),
11209                mask(IMM8, 11),
11210                mask(IMM8, 12),
11211                mask(IMM8, 13),
11212                mask(IMM8, 14),
11213                mask(IMM8, 15),
11214                mask(IMM8, 16),
11215                mask(IMM8, 17),
11216                mask(IMM8, 18),
11217                mask(IMM8, 19),
11218                mask(IMM8, 20),
11219                mask(IMM8, 21),
11220                mask(IMM8, 22),
11221                mask(IMM8, 23),
11222                mask(IMM8, 24),
11223                mask(IMM8, 25),
11224                mask(IMM8, 26),
11225                mask(IMM8, 27),
11226                mask(IMM8, 28),
11227                mask(IMM8, 29),
11228                mask(IMM8, 30),
11229                mask(IMM8, 31),
11230                mask(IMM8, 32),
11231                mask(IMM8, 33),
11232                mask(IMM8, 34),
11233                mask(IMM8, 35),
11234                mask(IMM8, 36),
11235                mask(IMM8, 37),
11236                mask(IMM8, 38),
11237                mask(IMM8, 39),
11238                mask(IMM8, 40),
11239                mask(IMM8, 41),
11240                mask(IMM8, 42),
11241                mask(IMM8, 43),
11242                mask(IMM8, 44),
11243                mask(IMM8, 45),
11244                mask(IMM8, 46),
11245                mask(IMM8, 47),
11246                mask(IMM8, 48),
11247                mask(IMM8, 49),
11248                mask(IMM8, 50),
11249                mask(IMM8, 51),
11250                mask(IMM8, 52),
11251                mask(IMM8, 53),
11252                mask(IMM8, 54),
11253                mask(IMM8, 55),
11254                mask(IMM8, 56),
11255                mask(IMM8, 57),
11256                mask(IMM8, 58),
11257                mask(IMM8, 59),
11258                mask(IMM8, 60),
11259                mask(IMM8, 61),
11260                mask(IMM8, 62),
11261                mask(IMM8, 63),
11262            ],
11263        );
11264        transmute(r)
11265    }
11266}
11267
11268/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
11269///
11270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
11271#[inline]
11272#[target_feature(enable = "avx512bw")]
11273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11274#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
11275#[rustc_legacy_const_generics(1)]
11276pub fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11277    unsafe {
11278        static_assert_uimm_bits!(IMM8, 8);
11279        const fn mask(shift: i32, i: u32) -> u32 {
11280            let shift = shift as u32 & 0xff;
11281            if shift > 15 || (15 - (i % 16)) < shift {
11282                0
11283            } else {
11284                64 + (i + shift)
11285            }
11286        }
11287        let a = a.as_i8x64();
11288        let zero = i8x64::ZERO;
11289        let r: i8x64 = simd_shuffle!(
11290            zero,
11291            a,
11292            [
11293                mask(IMM8, 0),
11294                mask(IMM8, 1),
11295                mask(IMM8, 2),
11296                mask(IMM8, 3),
11297                mask(IMM8, 4),
11298                mask(IMM8, 5),
11299                mask(IMM8, 6),
11300                mask(IMM8, 7),
11301                mask(IMM8, 8),
11302                mask(IMM8, 9),
11303                mask(IMM8, 10),
11304                mask(IMM8, 11),
11305                mask(IMM8, 12),
11306                mask(IMM8, 13),
11307                mask(IMM8, 14),
11308                mask(IMM8, 15),
11309                mask(IMM8, 16),
11310                mask(IMM8, 17),
11311                mask(IMM8, 18),
11312                mask(IMM8, 19),
11313                mask(IMM8, 20),
11314                mask(IMM8, 21),
11315                mask(IMM8, 22),
11316                mask(IMM8, 23),
11317                mask(IMM8, 24),
11318                mask(IMM8, 25),
11319                mask(IMM8, 26),
11320                mask(IMM8, 27),
11321                mask(IMM8, 28),
11322                mask(IMM8, 29),
11323                mask(IMM8, 30),
11324                mask(IMM8, 31),
11325                mask(IMM8, 32),
11326                mask(IMM8, 33),
11327                mask(IMM8, 34),
11328                mask(IMM8, 35),
11329                mask(IMM8, 36),
11330                mask(IMM8, 37),
11331                mask(IMM8, 38),
11332                mask(IMM8, 39),
11333                mask(IMM8, 40),
11334                mask(IMM8, 41),
11335                mask(IMM8, 42),
11336                mask(IMM8, 43),
11337                mask(IMM8, 44),
11338                mask(IMM8, 45),
11339                mask(IMM8, 46),
11340                mask(IMM8, 47),
11341                mask(IMM8, 48),
11342                mask(IMM8, 49),
11343                mask(IMM8, 50),
11344                mask(IMM8, 51),
11345                mask(IMM8, 52),
11346                mask(IMM8, 53),
11347                mask(IMM8, 54),
11348                mask(IMM8, 55),
11349                mask(IMM8, 56),
11350                mask(IMM8, 57),
11351                mask(IMM8, 58),
11352                mask(IMM8, 59),
11353                mask(IMM8, 60),
11354                mask(IMM8, 61),
11355                mask(IMM8, 62),
11356                mask(IMM8, 63),
11357            ],
11358        );
11359        transmute(r)
11360    }
11361}
11362
11363/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
11364/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
11365/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
11366///
11367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
11368#[inline]
11369#[target_feature(enable = "avx512bw")]
11370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11371#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11372#[rustc_legacy_const_generics(2)]
11373pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
11374    const fn mask(shift: u32, i: u32) -> u32 {
11375        let shift = shift % 16;
11376        let mod_i = i % 16;
11377        if mod_i < (16 - shift) {
11378            i + shift
11379        } else {
11380            i + 48 + shift
11381        }
11382    }
11383
11384    // If palignr is shifting the pair of vectors more than the size of two
11385    // lanes, emit zero.
11386    if IMM8 >= 32 {
11387        return _mm512_setzero_si512();
11388    }
11389    // If palignr is shifting the pair of input vectors more than one lane,
11390    // but less than two lanes, convert to shifting in zeroes.
11391    let (a, b) = if IMM8 > 16 {
11392        (_mm512_setzero_si512(), a)
11393    } else {
11394        (a, b)
11395    };
11396    unsafe {
11397        if IMM8 == 16 {
11398            return transmute(a);
11399        }
11400
11401        let r: i8x64 = simd_shuffle!(
11402            b.as_i8x64(),
11403            a.as_i8x64(),
11404            [
11405                mask(IMM8 as u32, 0),
11406                mask(IMM8 as u32, 1),
11407                mask(IMM8 as u32, 2),
11408                mask(IMM8 as u32, 3),
11409                mask(IMM8 as u32, 4),
11410                mask(IMM8 as u32, 5),
11411                mask(IMM8 as u32, 6),
11412                mask(IMM8 as u32, 7),
11413                mask(IMM8 as u32, 8),
11414                mask(IMM8 as u32, 9),
11415                mask(IMM8 as u32, 10),
11416                mask(IMM8 as u32, 11),
11417                mask(IMM8 as u32, 12),
11418                mask(IMM8 as u32, 13),
11419                mask(IMM8 as u32, 14),
11420                mask(IMM8 as u32, 15),
11421                mask(IMM8 as u32, 16),
11422                mask(IMM8 as u32, 17),
11423                mask(IMM8 as u32, 18),
11424                mask(IMM8 as u32, 19),
11425                mask(IMM8 as u32, 20),
11426                mask(IMM8 as u32, 21),
11427                mask(IMM8 as u32, 22),
11428                mask(IMM8 as u32, 23),
11429                mask(IMM8 as u32, 24),
11430                mask(IMM8 as u32, 25),
11431                mask(IMM8 as u32, 26),
11432                mask(IMM8 as u32, 27),
11433                mask(IMM8 as u32, 28),
11434                mask(IMM8 as u32, 29),
11435                mask(IMM8 as u32, 30),
11436                mask(IMM8 as u32, 31),
11437                mask(IMM8 as u32, 32),
11438                mask(IMM8 as u32, 33),
11439                mask(IMM8 as u32, 34),
11440                mask(IMM8 as u32, 35),
11441                mask(IMM8 as u32, 36),
11442                mask(IMM8 as u32, 37),
11443                mask(IMM8 as u32, 38),
11444                mask(IMM8 as u32, 39),
11445                mask(IMM8 as u32, 40),
11446                mask(IMM8 as u32, 41),
11447                mask(IMM8 as u32, 42),
11448                mask(IMM8 as u32, 43),
11449                mask(IMM8 as u32, 44),
11450                mask(IMM8 as u32, 45),
11451                mask(IMM8 as u32, 46),
11452                mask(IMM8 as u32, 47),
11453                mask(IMM8 as u32, 48),
11454                mask(IMM8 as u32, 49),
11455                mask(IMM8 as u32, 50),
11456                mask(IMM8 as u32, 51),
11457                mask(IMM8 as u32, 52),
11458                mask(IMM8 as u32, 53),
11459                mask(IMM8 as u32, 54),
11460                mask(IMM8 as u32, 55),
11461                mask(IMM8 as u32, 56),
11462                mask(IMM8 as u32, 57),
11463                mask(IMM8 as u32, 58),
11464                mask(IMM8 as u32, 59),
11465                mask(IMM8 as u32, 60),
11466                mask(IMM8 as u32, 61),
11467                mask(IMM8 as u32, 62),
11468                mask(IMM8 as u32, 63),
11469            ],
11470        );
11471        transmute(r)
11472    }
11473}
11474
11475/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11476///
11477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
11478#[inline]
11479#[target_feature(enable = "avx512bw")]
11480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11481#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11482#[rustc_legacy_const_generics(4)]
11483pub fn _mm512_mask_alignr_epi8<const IMM8: i32>(
11484    src: __m512i,
11485    k: __mmask64,
11486    a: __m512i,
11487    b: __m512i,
11488) -> __m512i {
11489    unsafe {
11490        static_assert_uimm_bits!(IMM8, 8);
11491        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11492        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
11493    }
11494}
11495
11496/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11497///
11498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
11499#[inline]
11500#[target_feature(enable = "avx512bw")]
11501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11502#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11503#[rustc_legacy_const_generics(3)]
11504pub fn _mm512_maskz_alignr_epi8<const IMM8: i32>(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
11505    unsafe {
11506        static_assert_uimm_bits!(IMM8, 8);
11507        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11508        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
11509    }
11510}
11511
11512/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
11515#[inline]
11516#[target_feature(enable = "avx512bw,avx512vl")]
11517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11518#[rustc_legacy_const_generics(4)]
11519#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11520pub fn _mm256_mask_alignr_epi8<const IMM8: i32>(
11521    src: __m256i,
11522    k: __mmask32,
11523    a: __m256i,
11524    b: __m256i,
11525) -> __m256i {
11526    unsafe {
11527        static_assert_uimm_bits!(IMM8, 8);
11528        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11529        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
11530    }
11531}
11532
11533/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11534///
11535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
11536#[inline]
11537#[target_feature(enable = "avx512bw,avx512vl")]
11538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11539#[rustc_legacy_const_generics(3)]
11540#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11541pub fn _mm256_maskz_alignr_epi8<const IMM8: i32>(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
11542    unsafe {
11543        static_assert_uimm_bits!(IMM8, 8);
11544        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11545        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
11546    }
11547}
11548
11549/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11550///
11551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
11552#[inline]
11553#[target_feature(enable = "avx512bw,avx512vl")]
11554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11555#[rustc_legacy_const_generics(4)]
11556#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11557pub fn _mm_mask_alignr_epi8<const IMM8: i32>(
11558    src: __m128i,
11559    k: __mmask16,
11560    a: __m128i,
11561    b: __m128i,
11562) -> __m128i {
11563    unsafe {
11564        static_assert_uimm_bits!(IMM8, 8);
11565        let r = _mm_alignr_epi8::<IMM8>(a, b);
11566        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
11567    }
11568}
11569
11570/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11571///
11572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
11573#[inline]
11574#[target_feature(enable = "avx512bw,avx512vl")]
11575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11576#[rustc_legacy_const_generics(3)]
11577#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11578pub fn _mm_maskz_alignr_epi8<const IMM8: i32>(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
11579    unsafe {
11580        static_assert_uimm_bits!(IMM8, 8);
11581        let r = _mm_alignr_epi8::<IMM8>(a, b);
11582        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
11583    }
11584}
11585
11586/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11587///
11588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
11589#[inline]
11590#[target_feature(enable = "avx512bw")]
11591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11592#[cfg_attr(test, assert_instr(vpmovswb))]
11593pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11594    vpmovswbmem(mem_addr, a.as_i16x32(), k);
11595}
11596
11597/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11598///
11599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
11600#[inline]
11601#[target_feature(enable = "avx512bw,avx512vl")]
11602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11603#[cfg_attr(test, assert_instr(vpmovswb))]
11604pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11605    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
11606}
11607
11608/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11609///
11610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
11611#[inline]
11612#[target_feature(enable = "avx512bw,avx512vl")]
11613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11614#[cfg_attr(test, assert_instr(vpmovswb))]
11615pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11616    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
11617}
11618
11619/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11620///
11621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
11622#[inline]
11623#[target_feature(enable = "avx512bw")]
11624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11625#[cfg_attr(test, assert_instr(vpmovwb))]
11626pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11627    vpmovwbmem(mem_addr, a.as_i16x32(), k);
11628}
11629
11630/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11631///
11632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
11633#[inline]
11634#[target_feature(enable = "avx512bw,avx512vl")]
11635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11636#[cfg_attr(test, assert_instr(vpmovwb))]
11637pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11638    vpmovwbmem256(mem_addr, a.as_i16x16(), k);
11639}
11640
11641/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11642///
11643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
11644#[inline]
11645#[target_feature(enable = "avx512bw,avx512vl")]
11646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11647#[cfg_attr(test, assert_instr(vpmovwb))]
11648pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11649    vpmovwbmem128(mem_addr, a.as_i16x8(), k);
11650}
11651
11652/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11653///
11654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
11655#[inline]
11656#[target_feature(enable = "avx512bw")]
11657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11658#[cfg_attr(test, assert_instr(vpmovuswb))]
11659pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11660    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
11661}
11662
11663/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11664///
11665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
11666#[inline]
11667#[target_feature(enable = "avx512bw,avx512vl")]
11668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11669#[cfg_attr(test, assert_instr(vpmovuswb))]
11670pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11671    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
11672}
11673
11674/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11675///
11676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
11677#[inline]
11678#[target_feature(enable = "avx512bw,avx512vl")]
11679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11680#[cfg_attr(test, assert_instr(vpmovuswb))]
11681pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11682    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
11683}
11684
11685#[allow(improper_ctypes)]
11686unsafe extern "C" {
11687    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
11688    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
11689
11690    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
11691    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
11692
11693    #[link_name = "llvm.x86.avx512.packssdw.512"]
11694    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
11695    #[link_name = "llvm.x86.avx512.packsswb.512"]
11696    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
11697    #[link_name = "llvm.x86.avx512.packusdw.512"]
11698    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
11699    #[link_name = "llvm.x86.avx512.packuswb.512"]
11700    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
11701
11702    #[link_name = "llvm.x86.avx512.psll.w.512"]
11703    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
11704
11705    #[link_name = "llvm.x86.avx512.psrl.w.512"]
11706    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
11707
11708    #[link_name = "llvm.x86.avx512.psra.w.512"]
11709    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
11710
11711    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
11712    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
11713    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
11714    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
11715    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
11716    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
11717
11718    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
11719    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
11720    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
11721    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
11722    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
11723    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
11724
11725    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
11726    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
11727
11728    #[link_name = "llvm.x86.avx512.psad.bw.512"]
11729    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
11730
11731    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
11732    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
11733    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
11734    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
11735    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
11736    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
11737
11738    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
11739    fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
11740    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
11741    fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
11742    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
11743    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
11744
11745    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
11746    fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
11747    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
11748    fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
11749    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
11750    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
11751
11752    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
11753    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11754    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
11755    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11756    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
11757    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11758
11759    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
11760    fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11761    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
11762    fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11763    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
11764    fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11765
11766    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
11767    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11768    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
11769    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11770    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
11771    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11772}
11773
11774#[cfg(test)]
11775mod tests {
11776
11777    use stdarch_test::simd_test;
11778
11779    use crate::core_arch::x86::*;
11780    use crate::hint::black_box;
11781    use crate::mem::{self};
11782
11783    #[simd_test(enable = "avx512bw")]
11784    unsafe fn test_mm512_abs_epi16() {
11785        let a = _mm512_set1_epi16(-1);
11786        let r = _mm512_abs_epi16(a);
11787        let e = _mm512_set1_epi16(1);
11788        assert_eq_m512i(r, e);
11789    }
11790
11791    #[simd_test(enable = "avx512bw")]
11792    unsafe fn test_mm512_mask_abs_epi16() {
11793        let a = _mm512_set1_epi16(-1);
11794        let r = _mm512_mask_abs_epi16(a, 0, a);
11795        assert_eq_m512i(r, a);
11796        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
11797        #[rustfmt::skip]
11798        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11799                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11800        assert_eq_m512i(r, e);
11801    }
11802
11803    #[simd_test(enable = "avx512bw")]
11804    unsafe fn test_mm512_maskz_abs_epi16() {
11805        let a = _mm512_set1_epi16(-1);
11806        let r = _mm512_maskz_abs_epi16(0, a);
11807        assert_eq_m512i(r, _mm512_setzero_si512());
11808        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
11809        #[rustfmt::skip]
11810        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11811                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11812        assert_eq_m512i(r, e);
11813    }
11814
11815    #[simd_test(enable = "avx512bw,avx512vl")]
11816    unsafe fn test_mm256_mask_abs_epi16() {
11817        let a = _mm256_set1_epi16(-1);
11818        let r = _mm256_mask_abs_epi16(a, 0, a);
11819        assert_eq_m256i(r, a);
11820        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
11821        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11822        assert_eq_m256i(r, e);
11823    }
11824
11825    #[simd_test(enable = "avx512bw,avx512vl")]
11826    unsafe fn test_mm256_maskz_abs_epi16() {
11827        let a = _mm256_set1_epi16(-1);
11828        let r = _mm256_maskz_abs_epi16(0, a);
11829        assert_eq_m256i(r, _mm256_setzero_si256());
11830        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
11831        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11832        assert_eq_m256i(r, e);
11833    }
11834
11835    #[simd_test(enable = "avx512bw,avx512vl")]
11836    unsafe fn test_mm_mask_abs_epi16() {
11837        let a = _mm_set1_epi16(-1);
11838        let r = _mm_mask_abs_epi16(a, 0, a);
11839        assert_eq_m128i(r, a);
11840        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
11841        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
11842        assert_eq_m128i(r, e);
11843    }
11844
11845    #[simd_test(enable = "avx512bw,avx512vl")]
11846    unsafe fn test_mm_maskz_abs_epi16() {
11847        let a = _mm_set1_epi16(-1);
11848        let r = _mm_maskz_abs_epi16(0, a);
11849        assert_eq_m128i(r, _mm_setzero_si128());
11850        let r = _mm_maskz_abs_epi16(0b00001111, a);
11851        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
11852        assert_eq_m128i(r, e);
11853    }
11854
11855    #[simd_test(enable = "avx512bw")]
11856    unsafe fn test_mm512_abs_epi8() {
11857        let a = _mm512_set1_epi8(-1);
11858        let r = _mm512_abs_epi8(a);
11859        let e = _mm512_set1_epi8(1);
11860        assert_eq_m512i(r, e);
11861    }
11862
11863    #[simd_test(enable = "avx512bw")]
11864    unsafe fn test_mm512_mask_abs_epi8() {
11865        let a = _mm512_set1_epi8(-1);
11866        let r = _mm512_mask_abs_epi8(a, 0, a);
11867        assert_eq_m512i(r, a);
11868        let r = _mm512_mask_abs_epi8(
11869            a,
11870            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11871            a,
11872        );
11873        #[rustfmt::skip]
11874        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11875                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11876                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11877                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11878        assert_eq_m512i(r, e);
11879    }
11880
11881    #[simd_test(enable = "avx512bw")]
11882    unsafe fn test_mm512_maskz_abs_epi8() {
11883        let a = _mm512_set1_epi8(-1);
11884        let r = _mm512_maskz_abs_epi8(0, a);
11885        assert_eq_m512i(r, _mm512_setzero_si512());
11886        let r = _mm512_maskz_abs_epi8(
11887            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11888            a,
11889        );
11890        #[rustfmt::skip]
11891        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11892                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11893                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11894                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11895        assert_eq_m512i(r, e);
11896    }
11897
11898    #[simd_test(enable = "avx512bw,avx512vl")]
11899    unsafe fn test_mm256_mask_abs_epi8() {
11900        let a = _mm256_set1_epi8(-1);
11901        let r = _mm256_mask_abs_epi8(a, 0, a);
11902        assert_eq_m256i(r, a);
11903        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
11904        #[rustfmt::skip]
11905        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11906                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11907        assert_eq_m256i(r, e);
11908    }
11909
11910    #[simd_test(enable = "avx512bw,avx512vl")]
11911    unsafe fn test_mm256_maskz_abs_epi8() {
11912        let a = _mm256_set1_epi8(-1);
11913        let r = _mm256_maskz_abs_epi8(0, a);
11914        assert_eq_m256i(r, _mm256_setzero_si256());
11915        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
11916        #[rustfmt::skip]
11917        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11918                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11919        assert_eq_m256i(r, e);
11920    }
11921
11922    #[simd_test(enable = "avx512bw,avx512vl")]
11923    unsafe fn test_mm_mask_abs_epi8() {
11924        let a = _mm_set1_epi8(-1);
11925        let r = _mm_mask_abs_epi8(a, 0, a);
11926        assert_eq_m128i(r, a);
11927        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
11928        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11929        assert_eq_m128i(r, e);
11930    }
11931
11932    #[simd_test(enable = "avx512bw,avx512vl")]
11933    unsafe fn test_mm_maskz_abs_epi8() {
11934        let a = _mm_set1_epi8(-1);
11935        let r = _mm_maskz_abs_epi8(0, a);
11936        assert_eq_m128i(r, _mm_setzero_si128());
11937        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
11938        #[rustfmt::skip]
11939        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11940        assert_eq_m128i(r, e);
11941    }
11942
11943    #[simd_test(enable = "avx512bw")]
11944    unsafe fn test_mm512_add_epi16() {
11945        let a = _mm512_set1_epi16(1);
11946        let b = _mm512_set1_epi16(2);
11947        let r = _mm512_add_epi16(a, b);
11948        let e = _mm512_set1_epi16(3);
11949        assert_eq_m512i(r, e);
11950    }
11951
11952    #[simd_test(enable = "avx512bw")]
11953    unsafe fn test_mm512_mask_add_epi16() {
11954        let a = _mm512_set1_epi16(1);
11955        let b = _mm512_set1_epi16(2);
11956        let r = _mm512_mask_add_epi16(a, 0, a, b);
11957        assert_eq_m512i(r, a);
11958        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
11959        #[rustfmt::skip]
11960        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11961                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11962        assert_eq_m512i(r, e);
11963    }
11964
11965    #[simd_test(enable = "avx512bw")]
11966    unsafe fn test_mm512_maskz_add_epi16() {
11967        let a = _mm512_set1_epi16(1);
11968        let b = _mm512_set1_epi16(2);
11969        let r = _mm512_maskz_add_epi16(0, a, b);
11970        assert_eq_m512i(r, _mm512_setzero_si512());
11971        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
11972        #[rustfmt::skip]
11973        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11974                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11975        assert_eq_m512i(r, e);
11976    }
11977
11978    #[simd_test(enable = "avx512bw,avx512vl")]
11979    unsafe fn test_mm256_mask_add_epi16() {
11980        let a = _mm256_set1_epi16(1);
11981        let b = _mm256_set1_epi16(2);
11982        let r = _mm256_mask_add_epi16(a, 0, a, b);
11983        assert_eq_m256i(r, a);
11984        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
11985        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11986        assert_eq_m256i(r, e);
11987    }
11988
11989    #[simd_test(enable = "avx512bw,avx512vl")]
11990    unsafe fn test_mm256_maskz_add_epi16() {
11991        let a = _mm256_set1_epi16(1);
11992        let b = _mm256_set1_epi16(2);
11993        let r = _mm256_maskz_add_epi16(0, a, b);
11994        assert_eq_m256i(r, _mm256_setzero_si256());
11995        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
11996        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11997        assert_eq_m256i(r, e);
11998    }
11999
12000    #[simd_test(enable = "avx512bw,avx512vl")]
12001    unsafe fn test_mm_mask_add_epi16() {
12002        let a = _mm_set1_epi16(1);
12003        let b = _mm_set1_epi16(2);
12004        let r = _mm_mask_add_epi16(a, 0, a, b);
12005        assert_eq_m128i(r, a);
12006        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
12007        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
12008        assert_eq_m128i(r, e);
12009    }
12010
12011    #[simd_test(enable = "avx512bw,avx512vl")]
12012    unsafe fn test_mm_maskz_add_epi16() {
12013        let a = _mm_set1_epi16(1);
12014        let b = _mm_set1_epi16(2);
12015        let r = _mm_maskz_add_epi16(0, a, b);
12016        assert_eq_m128i(r, _mm_setzero_si128());
12017        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12018        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12019        assert_eq_m128i(r, e);
12020    }
12021
12022    #[simd_test(enable = "avx512bw")]
12023    unsafe fn test_mm512_add_epi8() {
12024        let a = _mm512_set1_epi8(1);
12025        let b = _mm512_set1_epi8(2);
12026        let r = _mm512_add_epi8(a, b);
12027        let e = _mm512_set1_epi8(3);
12028        assert_eq_m512i(r, e);
12029    }
12030
12031    #[simd_test(enable = "avx512bw")]
12032    unsafe fn test_mm512_mask_add_epi8() {
12033        let a = _mm512_set1_epi8(1);
12034        let b = _mm512_set1_epi8(2);
12035        let r = _mm512_mask_add_epi8(a, 0, a, b);
12036        assert_eq_m512i(r, a);
12037        let r = _mm512_mask_add_epi8(
12038            a,
12039            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12040            a,
12041            b,
12042        );
12043        #[rustfmt::skip]
12044        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12045                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12046                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12047                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12048        assert_eq_m512i(r, e);
12049    }
12050
12051    #[simd_test(enable = "avx512bw")]
12052    unsafe fn test_mm512_maskz_add_epi8() {
12053        let a = _mm512_set1_epi8(1);
12054        let b = _mm512_set1_epi8(2);
12055        let r = _mm512_maskz_add_epi8(0, a, b);
12056        assert_eq_m512i(r, _mm512_setzero_si512());
12057        let r = _mm512_maskz_add_epi8(
12058            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12059            a,
12060            b,
12061        );
12062        #[rustfmt::skip]
12063        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12064                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12065                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12066                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12067        assert_eq_m512i(r, e);
12068    }
12069
12070    #[simd_test(enable = "avx512bw,avx512vl")]
12071    unsafe fn test_mm256_mask_add_epi8() {
12072        let a = _mm256_set1_epi8(1);
12073        let b = _mm256_set1_epi8(2);
12074        let r = _mm256_mask_add_epi8(a, 0, a, b);
12075        assert_eq_m256i(r, a);
12076        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12077        #[rustfmt::skip]
12078        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12079                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12080        assert_eq_m256i(r, e);
12081    }
12082
12083    #[simd_test(enable = "avx512bw,avx512vl")]
12084    unsafe fn test_mm256_maskz_add_epi8() {
12085        let a = _mm256_set1_epi8(1);
12086        let b = _mm256_set1_epi8(2);
12087        let r = _mm256_maskz_add_epi8(0, a, b);
12088        assert_eq_m256i(r, _mm256_setzero_si256());
12089        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12090        #[rustfmt::skip]
12091        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12092                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12093        assert_eq_m256i(r, e);
12094    }
12095
12096    #[simd_test(enable = "avx512bw,avx512vl")]
12097    unsafe fn test_mm_mask_add_epi8() {
12098        let a = _mm_set1_epi8(1);
12099        let b = _mm_set1_epi8(2);
12100        let r = _mm_mask_add_epi8(a, 0, a, b);
12101        assert_eq_m128i(r, a);
12102        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12103        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12104        assert_eq_m128i(r, e);
12105    }
12106
12107    #[simd_test(enable = "avx512bw,avx512vl")]
12108    unsafe fn test_mm_maskz_add_epi8() {
12109        let a = _mm_set1_epi8(1);
12110        let b = _mm_set1_epi8(2);
12111        let r = _mm_maskz_add_epi8(0, a, b);
12112        assert_eq_m128i(r, _mm_setzero_si128());
12113        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12114        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12115        assert_eq_m128i(r, e);
12116    }
12117
12118    #[simd_test(enable = "avx512bw")]
12119    unsafe fn test_mm512_adds_epu16() {
12120        let a = _mm512_set1_epi16(1);
12121        let b = _mm512_set1_epi16(u16::MAX as i16);
12122        let r = _mm512_adds_epu16(a, b);
12123        let e = _mm512_set1_epi16(u16::MAX as i16);
12124        assert_eq_m512i(r, e);
12125    }
12126
12127    #[simd_test(enable = "avx512bw")]
12128    unsafe fn test_mm512_mask_adds_epu16() {
12129        let a = _mm512_set1_epi16(1);
12130        let b = _mm512_set1_epi16(u16::MAX as i16);
12131        let r = _mm512_mask_adds_epu16(a, 0, a, b);
12132        assert_eq_m512i(r, a);
12133        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12134        #[rustfmt::skip]
12135        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12136                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12137        assert_eq_m512i(r, e);
12138    }
12139
12140    #[simd_test(enable = "avx512bw")]
12141    unsafe fn test_mm512_maskz_adds_epu16() {
12142        let a = _mm512_set1_epi16(1);
12143        let b = _mm512_set1_epi16(u16::MAX as i16);
12144        let r = _mm512_maskz_adds_epu16(0, a, b);
12145        assert_eq_m512i(r, _mm512_setzero_si512());
12146        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
12147        #[rustfmt::skip]
12148        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12149                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12150        assert_eq_m512i(r, e);
12151    }
12152
12153    #[simd_test(enable = "avx512bw,avx512vl")]
12154    unsafe fn test_mm256_mask_adds_epu16() {
12155        let a = _mm256_set1_epi16(1);
12156        let b = _mm256_set1_epi16(u16::MAX as i16);
12157        let r = _mm256_mask_adds_epu16(a, 0, a, b);
12158        assert_eq_m256i(r, a);
12159        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
12160        #[rustfmt::skip]
12161        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12162        assert_eq_m256i(r, e);
12163    }
12164
12165    #[simd_test(enable = "avx512bw,avx512vl")]
12166    unsafe fn test_mm256_maskz_adds_epu16() {
12167        let a = _mm256_set1_epi16(1);
12168        let b = _mm256_set1_epi16(u16::MAX as i16);
12169        let r = _mm256_maskz_adds_epu16(0, a, b);
12170        assert_eq_m256i(r, _mm256_setzero_si256());
12171        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
12172        #[rustfmt::skip]
12173        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12174        assert_eq_m256i(r, e);
12175    }
12176
12177    #[simd_test(enable = "avx512bw,avx512vl")]
12178    unsafe fn test_mm_mask_adds_epu16() {
12179        let a = _mm_set1_epi16(1);
12180        let b = _mm_set1_epi16(u16::MAX as i16);
12181        let r = _mm_mask_adds_epu16(a, 0, a, b);
12182        assert_eq_m128i(r, a);
12183        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
12184        #[rustfmt::skip]
12185        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12186        assert_eq_m128i(r, e);
12187    }
12188
12189    #[simd_test(enable = "avx512bw,avx512vl")]
12190    unsafe fn test_mm_maskz_adds_epu16() {
12191        let a = _mm_set1_epi16(1);
12192        let b = _mm_set1_epi16(u16::MAX as i16);
12193        let r = _mm_maskz_adds_epu16(0, a, b);
12194        assert_eq_m128i(r, _mm_setzero_si128());
12195        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
12196        #[rustfmt::skip]
12197        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12198        assert_eq_m128i(r, e);
12199    }
12200
12201    #[simd_test(enable = "avx512bw")]
12202    unsafe fn test_mm512_adds_epu8() {
12203        let a = _mm512_set1_epi8(1);
12204        let b = _mm512_set1_epi8(u8::MAX as i8);
12205        let r = _mm512_adds_epu8(a, b);
12206        let e = _mm512_set1_epi8(u8::MAX as i8);
12207        assert_eq_m512i(r, e);
12208    }
12209
12210    #[simd_test(enable = "avx512bw")]
12211    unsafe fn test_mm512_mask_adds_epu8() {
12212        let a = _mm512_set1_epi8(1);
12213        let b = _mm512_set1_epi8(u8::MAX as i8);
12214        let r = _mm512_mask_adds_epu8(a, 0, a, b);
12215        assert_eq_m512i(r, a);
12216        let r = _mm512_mask_adds_epu8(
12217            a,
12218            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12219            a,
12220            b,
12221        );
12222        #[rustfmt::skip]
12223        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12224                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12225                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12226                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12227        assert_eq_m512i(r, e);
12228    }
12229
12230    #[simd_test(enable = "avx512bw")]
12231    unsafe fn test_mm512_maskz_adds_epu8() {
12232        let a = _mm512_set1_epi8(1);
12233        let b = _mm512_set1_epi8(u8::MAX as i8);
12234        let r = _mm512_maskz_adds_epu8(0, a, b);
12235        assert_eq_m512i(r, _mm512_setzero_si512());
12236        let r = _mm512_maskz_adds_epu8(
12237            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12238            a,
12239            b,
12240        );
12241        #[rustfmt::skip]
12242        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12243                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12244                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12245                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12246        assert_eq_m512i(r, e);
12247    }
12248
12249    #[simd_test(enable = "avx512bw,avx512vl")]
12250    unsafe fn test_mm256_mask_adds_epu8() {
12251        let a = _mm256_set1_epi8(1);
12252        let b = _mm256_set1_epi8(u8::MAX as i8);
12253        let r = _mm256_mask_adds_epu8(a, 0, a, b);
12254        assert_eq_m256i(r, a);
12255        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12256        #[rustfmt::skip]
12257        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12258                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12259        assert_eq_m256i(r, e);
12260    }
12261
12262    #[simd_test(enable = "avx512bw,avx512vl")]
12263    unsafe fn test_mm256_maskz_adds_epu8() {
12264        let a = _mm256_set1_epi8(1);
12265        let b = _mm256_set1_epi8(u8::MAX as i8);
12266        let r = _mm256_maskz_adds_epu8(0, a, b);
12267        assert_eq_m256i(r, _mm256_setzero_si256());
12268        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
12269        #[rustfmt::skip]
12270        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12271                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12272        assert_eq_m256i(r, e);
12273    }
12274
12275    #[simd_test(enable = "avx512bw,avx512vl")]
12276    unsafe fn test_mm_mask_adds_epu8() {
12277        let a = _mm_set1_epi8(1);
12278        let b = _mm_set1_epi8(u8::MAX as i8);
12279        let r = _mm_mask_adds_epu8(a, 0, a, b);
12280        assert_eq_m128i(r, a);
12281        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
12282        #[rustfmt::skip]
12283        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12284        assert_eq_m128i(r, e);
12285    }
12286
12287    #[simd_test(enable = "avx512bw,avx512vl")]
12288    unsafe fn test_mm_maskz_adds_epu8() {
12289        let a = _mm_set1_epi8(1);
12290        let b = _mm_set1_epi8(u8::MAX as i8);
12291        let r = _mm_maskz_adds_epu8(0, a, b);
12292        assert_eq_m128i(r, _mm_setzero_si128());
12293        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
12294        #[rustfmt::skip]
12295        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12296        assert_eq_m128i(r, e);
12297    }
12298
12299    #[simd_test(enable = "avx512bw")]
12300    unsafe fn test_mm512_adds_epi16() {
12301        let a = _mm512_set1_epi16(1);
12302        let b = _mm512_set1_epi16(i16::MAX);
12303        let r = _mm512_adds_epi16(a, b);
12304        let e = _mm512_set1_epi16(i16::MAX);
12305        assert_eq_m512i(r, e);
12306    }
12307
12308    #[simd_test(enable = "avx512bw")]
12309    unsafe fn test_mm512_mask_adds_epi16() {
12310        let a = _mm512_set1_epi16(1);
12311        let b = _mm512_set1_epi16(i16::MAX);
12312        let r = _mm512_mask_adds_epi16(a, 0, a, b);
12313        assert_eq_m512i(r, a);
12314        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12315        #[rustfmt::skip]
12316        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12317                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12318        assert_eq_m512i(r, e);
12319    }
12320
12321    #[simd_test(enable = "avx512bw")]
12322    unsafe fn test_mm512_maskz_adds_epi16() {
12323        let a = _mm512_set1_epi16(1);
12324        let b = _mm512_set1_epi16(i16::MAX);
12325        let r = _mm512_maskz_adds_epi16(0, a, b);
12326        assert_eq_m512i(r, _mm512_setzero_si512());
12327        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
12328        #[rustfmt::skip]
12329        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12330                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12331        assert_eq_m512i(r, e);
12332    }
12333
12334    #[simd_test(enable = "avx512bw,avx512vl")]
12335    unsafe fn test_mm256_mask_adds_epi16() {
12336        let a = _mm256_set1_epi16(1);
12337        let b = _mm256_set1_epi16(i16::MAX);
12338        let r = _mm256_mask_adds_epi16(a, 0, a, b);
12339        assert_eq_m256i(r, a);
12340        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
12341        #[rustfmt::skip]
12342        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12343        assert_eq_m256i(r, e);
12344    }
12345
12346    #[simd_test(enable = "avx512bw,avx512vl")]
12347    unsafe fn test_mm256_maskz_adds_epi16() {
12348        let a = _mm256_set1_epi16(1);
12349        let b = _mm256_set1_epi16(i16::MAX);
12350        let r = _mm256_maskz_adds_epi16(0, a, b);
12351        assert_eq_m256i(r, _mm256_setzero_si256());
12352        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
12353        #[rustfmt::skip]
12354        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12355        assert_eq_m256i(r, e);
12356    }
12357
12358    #[simd_test(enable = "avx512bw,avx512vl")]
12359    unsafe fn test_mm_mask_adds_epi16() {
12360        let a = _mm_set1_epi16(1);
12361        let b = _mm_set1_epi16(i16::MAX);
12362        let r = _mm_mask_adds_epi16(a, 0, a, b);
12363        assert_eq_m128i(r, a);
12364        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
12365        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12366        assert_eq_m128i(r, e);
12367    }
12368
12369    #[simd_test(enable = "avx512bw,avx512vl")]
12370    unsafe fn test_mm_maskz_adds_epi16() {
12371        let a = _mm_set1_epi16(1);
12372        let b = _mm_set1_epi16(i16::MAX);
12373        let r = _mm_maskz_adds_epi16(0, a, b);
12374        assert_eq_m128i(r, _mm_setzero_si128());
12375        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
12376        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12377        assert_eq_m128i(r, e);
12378    }
12379
12380    #[simd_test(enable = "avx512bw")]
12381    unsafe fn test_mm512_adds_epi8() {
12382        let a = _mm512_set1_epi8(1);
12383        let b = _mm512_set1_epi8(i8::MAX);
12384        let r = _mm512_adds_epi8(a, b);
12385        let e = _mm512_set1_epi8(i8::MAX);
12386        assert_eq_m512i(r, e);
12387    }
12388
12389    #[simd_test(enable = "avx512bw")]
12390    unsafe fn test_mm512_mask_adds_epi8() {
12391        let a = _mm512_set1_epi8(1);
12392        let b = _mm512_set1_epi8(i8::MAX);
12393        let r = _mm512_mask_adds_epi8(a, 0, a, b);
12394        assert_eq_m512i(r, a);
12395        let r = _mm512_mask_adds_epi8(
12396            a,
12397            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12398            a,
12399            b,
12400        );
12401        #[rustfmt::skip]
12402        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12403                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12404                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12405                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12406        assert_eq_m512i(r, e);
12407    }
12408
12409    #[simd_test(enable = "avx512bw")]
12410    unsafe fn test_mm512_maskz_adds_epi8() {
12411        let a = _mm512_set1_epi8(1);
12412        let b = _mm512_set1_epi8(i8::MAX);
12413        let r = _mm512_maskz_adds_epi8(0, a, b);
12414        assert_eq_m512i(r, _mm512_setzero_si512());
12415        let r = _mm512_maskz_adds_epi8(
12416            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12417            a,
12418            b,
12419        );
12420        #[rustfmt::skip]
12421        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12422                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12423                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12424                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12425        assert_eq_m512i(r, e);
12426    }
12427
12428    #[simd_test(enable = "avx512bw,avx512vl")]
12429    unsafe fn test_mm256_mask_adds_epi8() {
12430        let a = _mm256_set1_epi8(1);
12431        let b = _mm256_set1_epi8(i8::MAX);
12432        let r = _mm256_mask_adds_epi8(a, 0, a, b);
12433        assert_eq_m256i(r, a);
12434        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12435        #[rustfmt::skip]
12436        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12437                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12438        assert_eq_m256i(r, e);
12439    }
12440
12441    #[simd_test(enable = "avx512bw,avx512vl")]
12442    unsafe fn test_mm256_maskz_adds_epi8() {
12443        let a = _mm256_set1_epi8(1);
12444        let b = _mm256_set1_epi8(i8::MAX);
12445        let r = _mm256_maskz_adds_epi8(0, a, b);
12446        assert_eq_m256i(r, _mm256_setzero_si256());
12447        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
12448        #[rustfmt::skip]
12449        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12450                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12451        assert_eq_m256i(r, e);
12452    }
12453
12454    #[simd_test(enable = "avx512bw,avx512vl")]
12455    unsafe fn test_mm_mask_adds_epi8() {
12456        let a = _mm_set1_epi8(1);
12457        let b = _mm_set1_epi8(i8::MAX);
12458        let r = _mm_mask_adds_epi8(a, 0, a, b);
12459        assert_eq_m128i(r, a);
12460        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
12461        #[rustfmt::skip]
12462        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12463        assert_eq_m128i(r, e);
12464    }
12465
12466    #[simd_test(enable = "avx512bw,avx512vl")]
12467    unsafe fn test_mm_maskz_adds_epi8() {
12468        let a = _mm_set1_epi8(1);
12469        let b = _mm_set1_epi8(i8::MAX);
12470        let r = _mm_maskz_adds_epi8(0, a, b);
12471        assert_eq_m128i(r, _mm_setzero_si128());
12472        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
12473        #[rustfmt::skip]
12474        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12475        assert_eq_m128i(r, e);
12476    }
12477
12478    #[simd_test(enable = "avx512bw")]
12479    unsafe fn test_mm512_sub_epi16() {
12480        let a = _mm512_set1_epi16(1);
12481        let b = _mm512_set1_epi16(2);
12482        let r = _mm512_sub_epi16(a, b);
12483        let e = _mm512_set1_epi16(-1);
12484        assert_eq_m512i(r, e);
12485    }
12486
12487    #[simd_test(enable = "avx512bw")]
12488    unsafe fn test_mm512_mask_sub_epi16() {
12489        let a = _mm512_set1_epi16(1);
12490        let b = _mm512_set1_epi16(2);
12491        let r = _mm512_mask_sub_epi16(a, 0, a, b);
12492        assert_eq_m512i(r, a);
12493        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12494        #[rustfmt::skip]
12495        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12496                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12497        assert_eq_m512i(r, e);
12498    }
12499
12500    #[simd_test(enable = "avx512bw")]
12501    unsafe fn test_mm512_maskz_sub_epi16() {
12502        let a = _mm512_set1_epi16(1);
12503        let b = _mm512_set1_epi16(2);
12504        let r = _mm512_maskz_sub_epi16(0, a, b);
12505        assert_eq_m512i(r, _mm512_setzero_si512());
12506        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
12507        #[rustfmt::skip]
12508        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12509                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12510        assert_eq_m512i(r, e);
12511    }
12512
12513    #[simd_test(enable = "avx512bw,avx512vl")]
12514    unsafe fn test_mm256_mask_sub_epi16() {
12515        let a = _mm256_set1_epi16(1);
12516        let b = _mm256_set1_epi16(2);
12517        let r = _mm256_mask_sub_epi16(a, 0, a, b);
12518        assert_eq_m256i(r, a);
12519        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
12520        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12521        assert_eq_m256i(r, e);
12522    }
12523
12524    #[simd_test(enable = "avx512bw,avx512vl")]
12525    unsafe fn test_mm256_maskz_sub_epi16() {
12526        let a = _mm256_set1_epi16(1);
12527        let b = _mm256_set1_epi16(2);
12528        let r = _mm256_maskz_sub_epi16(0, a, b);
12529        assert_eq_m256i(r, _mm256_setzero_si256());
12530        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
12531        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12532        assert_eq_m256i(r, e);
12533    }
12534
12535    #[simd_test(enable = "avx512bw,avx512vl")]
12536    unsafe fn test_mm_mask_sub_epi16() {
12537        let a = _mm_set1_epi16(1);
12538        let b = _mm_set1_epi16(2);
12539        let r = _mm_mask_sub_epi16(a, 0, a, b);
12540        assert_eq_m128i(r, a);
12541        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
12542        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
12543        assert_eq_m128i(r, e);
12544    }
12545
12546    #[simd_test(enable = "avx512bw,avx512vl")]
12547    unsafe fn test_mm_maskz_sub_epi16() {
12548        let a = _mm_set1_epi16(1);
12549        let b = _mm_set1_epi16(2);
12550        let r = _mm_maskz_sub_epi16(0, a, b);
12551        assert_eq_m128i(r, _mm_setzero_si128());
12552        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
12553        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
12554        assert_eq_m128i(r, e);
12555    }
12556
12557    #[simd_test(enable = "avx512bw")]
12558    unsafe fn test_mm512_sub_epi8() {
12559        let a = _mm512_set1_epi8(1);
12560        let b = _mm512_set1_epi8(2);
12561        let r = _mm512_sub_epi8(a, b);
12562        let e = _mm512_set1_epi8(-1);
12563        assert_eq_m512i(r, e);
12564    }
12565
12566    #[simd_test(enable = "avx512bw")]
12567    unsafe fn test_mm512_mask_sub_epi8() {
12568        let a = _mm512_set1_epi8(1);
12569        let b = _mm512_set1_epi8(2);
12570        let r = _mm512_mask_sub_epi8(a, 0, a, b);
12571        assert_eq_m512i(r, a);
12572        let r = _mm512_mask_sub_epi8(
12573            a,
12574            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12575            a,
12576            b,
12577        );
12578        #[rustfmt::skip]
12579        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12580                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12581                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12582                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12583        assert_eq_m512i(r, e);
12584    }
12585
12586    #[simd_test(enable = "avx512bw")]
12587    unsafe fn test_mm512_maskz_sub_epi8() {
12588        let a = _mm512_set1_epi8(1);
12589        let b = _mm512_set1_epi8(2);
12590        let r = _mm512_maskz_sub_epi8(0, a, b);
12591        assert_eq_m512i(r, _mm512_setzero_si512());
12592        let r = _mm512_maskz_sub_epi8(
12593            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12594            a,
12595            b,
12596        );
12597        #[rustfmt::skip]
12598        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12599                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12600                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12601                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12602        assert_eq_m512i(r, e);
12603    }
12604
12605    #[simd_test(enable = "avx512bw,avx512vl")]
12606    unsafe fn test_mm256_mask_sub_epi8() {
12607        let a = _mm256_set1_epi8(1);
12608        let b = _mm256_set1_epi8(2);
12609        let r = _mm256_mask_sub_epi8(a, 0, a, b);
12610        assert_eq_m256i(r, a);
12611        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12612        #[rustfmt::skip]
12613        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12614                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12615        assert_eq_m256i(r, e);
12616    }
12617
12618    #[simd_test(enable = "avx512bw,avx512vl")]
12619    unsafe fn test_mm256_maskz_sub_epi8() {
12620        let a = _mm256_set1_epi8(1);
12621        let b = _mm256_set1_epi8(2);
12622        let r = _mm256_maskz_sub_epi8(0, a, b);
12623        assert_eq_m256i(r, _mm256_setzero_si256());
12624        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
12625        #[rustfmt::skip]
12626        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12627                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12628        assert_eq_m256i(r, e);
12629    }
12630
12631    #[simd_test(enable = "avx512bw,avx512vl")]
12632    unsafe fn test_mm_mask_sub_epi8() {
12633        let a = _mm_set1_epi8(1);
12634        let b = _mm_set1_epi8(2);
12635        let r = _mm_mask_sub_epi8(a, 0, a, b);
12636        assert_eq_m128i(r, a);
12637        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
12638        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12639        assert_eq_m128i(r, e);
12640    }
12641
12642    #[simd_test(enable = "avx512bw,avx512vl")]
12643    unsafe fn test_mm_maskz_sub_epi8() {
12644        let a = _mm_set1_epi8(1);
12645        let b = _mm_set1_epi8(2);
12646        let r = _mm_maskz_sub_epi8(0, a, b);
12647        assert_eq_m128i(r, _mm_setzero_si128());
12648        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
12649        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12650        assert_eq_m128i(r, e);
12651    }
12652
12653    #[simd_test(enable = "avx512bw")]
12654    unsafe fn test_mm512_subs_epu16() {
12655        let a = _mm512_set1_epi16(1);
12656        let b = _mm512_set1_epi16(u16::MAX as i16);
12657        let r = _mm512_subs_epu16(a, b);
12658        let e = _mm512_set1_epi16(0);
12659        assert_eq_m512i(r, e);
12660    }
12661
12662    #[simd_test(enable = "avx512bw")]
12663    unsafe fn test_mm512_mask_subs_epu16() {
12664        let a = _mm512_set1_epi16(1);
12665        let b = _mm512_set1_epi16(u16::MAX as i16);
12666        let r = _mm512_mask_subs_epu16(a, 0, a, b);
12667        assert_eq_m512i(r, a);
12668        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12669        #[rustfmt::skip]
12670        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12671                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12672        assert_eq_m512i(r, e);
12673    }
12674
12675    #[simd_test(enable = "avx512bw")]
12676    unsafe fn test_mm512_maskz_subs_epu16() {
12677        let a = _mm512_set1_epi16(1);
12678        let b = _mm512_set1_epi16(u16::MAX as i16);
12679        let r = _mm512_maskz_subs_epu16(0, a, b);
12680        assert_eq_m512i(r, _mm512_setzero_si512());
12681        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
12682        #[rustfmt::skip]
12683        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12684                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12685        assert_eq_m512i(r, e);
12686    }
12687
12688    #[simd_test(enable = "avx512bw,avx512vl")]
12689    unsafe fn test_mm256_mask_subs_epu16() {
12690        let a = _mm256_set1_epi16(1);
12691        let b = _mm256_set1_epi16(u16::MAX as i16);
12692        let r = _mm256_mask_subs_epu16(a, 0, a, b);
12693        assert_eq_m256i(r, a);
12694        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
12695        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12696        assert_eq_m256i(r, e);
12697    }
12698
12699    #[simd_test(enable = "avx512bw,avx512vl")]
12700    unsafe fn test_mm256_maskz_subs_epu16() {
12701        let a = _mm256_set1_epi16(1);
12702        let b = _mm256_set1_epi16(u16::MAX as i16);
12703        let r = _mm256_maskz_subs_epu16(0, a, b);
12704        assert_eq_m256i(r, _mm256_setzero_si256());
12705        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
12706        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12707        assert_eq_m256i(r, e);
12708    }
12709
12710    #[simd_test(enable = "avx512bw,avx512vl")]
12711    unsafe fn test_mm_mask_subs_epu16() {
12712        let a = _mm_set1_epi16(1);
12713        let b = _mm_set1_epi16(u16::MAX as i16);
12714        let r = _mm_mask_subs_epu16(a, 0, a, b);
12715        assert_eq_m128i(r, a);
12716        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
12717        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12718        assert_eq_m128i(r, e);
12719    }
12720
12721    #[simd_test(enable = "avx512bw,avx512vl")]
12722    unsafe fn test_mm_maskz_subs_epu16() {
12723        let a = _mm_set1_epi16(1);
12724        let b = _mm_set1_epi16(u16::MAX as i16);
12725        let r = _mm_maskz_subs_epu16(0, a, b);
12726        assert_eq_m128i(r, _mm_setzero_si128());
12727        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
12728        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12729        assert_eq_m128i(r, e);
12730    }
12731
12732    #[simd_test(enable = "avx512bw")]
12733    unsafe fn test_mm512_subs_epu8() {
12734        let a = _mm512_set1_epi8(1);
12735        let b = _mm512_set1_epi8(u8::MAX as i8);
12736        let r = _mm512_subs_epu8(a, b);
12737        let e = _mm512_set1_epi8(0);
12738        assert_eq_m512i(r, e);
12739    }
12740
12741    #[simd_test(enable = "avx512bw")]
12742    unsafe fn test_mm512_mask_subs_epu8() {
12743        let a = _mm512_set1_epi8(1);
12744        let b = _mm512_set1_epi8(u8::MAX as i8);
12745        let r = _mm512_mask_subs_epu8(a, 0, a, b);
12746        assert_eq_m512i(r, a);
12747        let r = _mm512_mask_subs_epu8(
12748            a,
12749            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12750            a,
12751            b,
12752        );
12753        #[rustfmt::skip]
12754        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12755                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12756                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12757                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12758        assert_eq_m512i(r, e);
12759    }
12760
12761    #[simd_test(enable = "avx512bw")]
12762    unsafe fn test_mm512_maskz_subs_epu8() {
12763        let a = _mm512_set1_epi8(1);
12764        let b = _mm512_set1_epi8(u8::MAX as i8);
12765        let r = _mm512_maskz_subs_epu8(0, a, b);
12766        assert_eq_m512i(r, _mm512_setzero_si512());
12767        let r = _mm512_maskz_subs_epu8(
12768            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12769            a,
12770            b,
12771        );
12772        #[rustfmt::skip]
12773        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12774                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12775                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12776                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12777        assert_eq_m512i(r, e);
12778    }
12779
12780    #[simd_test(enable = "avx512bw,avx512vl")]
12781    unsafe fn test_mm256_mask_subs_epu8() {
12782        let a = _mm256_set1_epi8(1);
12783        let b = _mm256_set1_epi8(u8::MAX as i8);
12784        let r = _mm256_mask_subs_epu8(a, 0, a, b);
12785        assert_eq_m256i(r, a);
12786        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12787        #[rustfmt::skip]
12788        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12789                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12790        assert_eq_m256i(r, e);
12791    }
12792
12793    #[simd_test(enable = "avx512bw,avx512vl")]
12794    unsafe fn test_mm256_maskz_subs_epu8() {
12795        let a = _mm256_set1_epi8(1);
12796        let b = _mm256_set1_epi8(u8::MAX as i8);
12797        let r = _mm256_maskz_subs_epu8(0, a, b);
12798        assert_eq_m256i(r, _mm256_setzero_si256());
12799        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
12800        #[rustfmt::skip]
12801        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12802                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12803        assert_eq_m256i(r, e);
12804    }
12805
12806    #[simd_test(enable = "avx512bw,avx512vl")]
12807    unsafe fn test_mm_mask_subs_epu8() {
12808        let a = _mm_set1_epi8(1);
12809        let b = _mm_set1_epi8(u8::MAX as i8);
12810        let r = _mm_mask_subs_epu8(a, 0, a, b);
12811        assert_eq_m128i(r, a);
12812        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
12813        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12814        assert_eq_m128i(r, e);
12815    }
12816
12817    #[simd_test(enable = "avx512bw,avx512vl")]
12818    unsafe fn test_mm_maskz_subs_epu8() {
12819        let a = _mm_set1_epi8(1);
12820        let b = _mm_set1_epi8(u8::MAX as i8);
12821        let r = _mm_maskz_subs_epu8(0, a, b);
12822        assert_eq_m128i(r, _mm_setzero_si128());
12823        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
12824        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12825        assert_eq_m128i(r, e);
12826    }
12827
12828    #[simd_test(enable = "avx512bw")]
12829    unsafe fn test_mm512_subs_epi16() {
12830        let a = _mm512_set1_epi16(-1);
12831        let b = _mm512_set1_epi16(i16::MAX);
12832        let r = _mm512_subs_epi16(a, b);
12833        let e = _mm512_set1_epi16(i16::MIN);
12834        assert_eq_m512i(r, e);
12835    }
12836
12837    #[simd_test(enable = "avx512bw")]
12838    unsafe fn test_mm512_mask_subs_epi16() {
12839        let a = _mm512_set1_epi16(-1);
12840        let b = _mm512_set1_epi16(i16::MAX);
12841        let r = _mm512_mask_subs_epi16(a, 0, a, b);
12842        assert_eq_m512i(r, a);
12843        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12844        #[rustfmt::skip]
12845        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12846                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12847        assert_eq_m512i(r, e);
12848    }
12849
12850    #[simd_test(enable = "avx512bw")]
12851    unsafe fn test_mm512_maskz_subs_epi16() {
12852        let a = _mm512_set1_epi16(-1);
12853        let b = _mm512_set1_epi16(i16::MAX);
12854        let r = _mm512_maskz_subs_epi16(0, a, b);
12855        assert_eq_m512i(r, _mm512_setzero_si512());
12856        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
12857        #[rustfmt::skip]
12858        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12859                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12860        assert_eq_m512i(r, e);
12861    }
12862
12863    #[simd_test(enable = "avx512bw,avx512vl")]
12864    unsafe fn test_mm256_mask_subs_epi16() {
12865        let a = _mm256_set1_epi16(-1);
12866        let b = _mm256_set1_epi16(i16::MAX);
12867        let r = _mm256_mask_subs_epi16(a, 0, a, b);
12868        assert_eq_m256i(r, a);
12869        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
12870        #[rustfmt::skip]
12871        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12872        assert_eq_m256i(r, e);
12873    }
12874
12875    #[simd_test(enable = "avx512bw,avx512vl")]
12876    unsafe fn test_mm256_maskz_subs_epi16() {
12877        let a = _mm256_set1_epi16(-1);
12878        let b = _mm256_set1_epi16(i16::MAX);
12879        let r = _mm256_maskz_subs_epi16(0, a, b);
12880        assert_eq_m256i(r, _mm256_setzero_si256());
12881        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
12882        #[rustfmt::skip]
12883        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12884        assert_eq_m256i(r, e);
12885    }
12886
12887    #[simd_test(enable = "avx512bw,avx512vl")]
12888    unsafe fn test_mm_mask_subs_epi16() {
12889        let a = _mm_set1_epi16(-1);
12890        let b = _mm_set1_epi16(i16::MAX);
12891        let r = _mm_mask_subs_epi16(a, 0, a, b);
12892        assert_eq_m128i(r, a);
12893        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
12894        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12895        assert_eq_m128i(r, e);
12896    }
12897
12898    #[simd_test(enable = "avx512bw,avx512vl")]
12899    unsafe fn test_mm_maskz_subs_epi16() {
12900        let a = _mm_set1_epi16(-1);
12901        let b = _mm_set1_epi16(i16::MAX);
12902        let r = _mm_maskz_subs_epi16(0, a, b);
12903        assert_eq_m128i(r, _mm_setzero_si128());
12904        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
12905        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12906        assert_eq_m128i(r, e);
12907    }
12908
12909    #[simd_test(enable = "avx512bw")]
12910    unsafe fn test_mm512_subs_epi8() {
12911        let a = _mm512_set1_epi8(-1);
12912        let b = _mm512_set1_epi8(i8::MAX);
12913        let r = _mm512_subs_epi8(a, b);
12914        let e = _mm512_set1_epi8(i8::MIN);
12915        assert_eq_m512i(r, e);
12916    }
12917
12918    #[simd_test(enable = "avx512bw")]
12919    unsafe fn test_mm512_mask_subs_epi8() {
12920        let a = _mm512_set1_epi8(-1);
12921        let b = _mm512_set1_epi8(i8::MAX);
12922        let r = _mm512_mask_subs_epi8(a, 0, a, b);
12923        assert_eq_m512i(r, a);
12924        let r = _mm512_mask_subs_epi8(
12925            a,
12926            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12927            a,
12928            b,
12929        );
12930        #[rustfmt::skip]
12931        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12932                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12933                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12934                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12935        assert_eq_m512i(r, e);
12936    }
12937
12938    #[simd_test(enable = "avx512bw")]
12939    unsafe fn test_mm512_maskz_subs_epi8() {
12940        let a = _mm512_set1_epi8(-1);
12941        let b = _mm512_set1_epi8(i8::MAX);
12942        let r = _mm512_maskz_subs_epi8(0, a, b);
12943        assert_eq_m512i(r, _mm512_setzero_si512());
12944        let r = _mm512_maskz_subs_epi8(
12945            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12946            a,
12947            b,
12948        );
12949        #[rustfmt::skip]
12950        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12951                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12952                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12953                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12954        assert_eq_m512i(r, e);
12955    }
12956
12957    #[simd_test(enable = "avx512bw,avx512vl")]
12958    unsafe fn test_mm256_mask_subs_epi8() {
12959        let a = _mm256_set1_epi8(-1);
12960        let b = _mm256_set1_epi8(i8::MAX);
12961        let r = _mm256_mask_subs_epi8(a, 0, a, b);
12962        assert_eq_m256i(r, a);
12963        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12964        #[rustfmt::skip]
12965        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12966                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12967        assert_eq_m256i(r, e);
12968    }
12969
12970    #[simd_test(enable = "avx512bw,avx512vl")]
12971    unsafe fn test_mm256_maskz_subs_epi8() {
12972        let a = _mm256_set1_epi8(-1);
12973        let b = _mm256_set1_epi8(i8::MAX);
12974        let r = _mm256_maskz_subs_epi8(0, a, b);
12975        assert_eq_m256i(r, _mm256_setzero_si256());
12976        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
12977        #[rustfmt::skip]
12978        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12979                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12980        assert_eq_m256i(r, e);
12981    }
12982
12983    #[simd_test(enable = "avx512bw,avx512vl")]
12984    unsafe fn test_mm_mask_subs_epi8() {
12985        let a = _mm_set1_epi8(-1);
12986        let b = _mm_set1_epi8(i8::MAX);
12987        let r = _mm_mask_subs_epi8(a, 0, a, b);
12988        assert_eq_m128i(r, a);
12989        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
12990        #[rustfmt::skip]
12991        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12992        assert_eq_m128i(r, e);
12993    }
12994
12995    #[simd_test(enable = "avx512bw,avx512vl")]
12996    unsafe fn test_mm_maskz_subs_epi8() {
12997        let a = _mm_set1_epi8(-1);
12998        let b = _mm_set1_epi8(i8::MAX);
12999        let r = _mm_maskz_subs_epi8(0, a, b);
13000        assert_eq_m128i(r, _mm_setzero_si128());
13001        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
13002        #[rustfmt::skip]
13003        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13004        assert_eq_m128i(r, e);
13005    }
13006
13007    #[simd_test(enable = "avx512bw")]
13008    unsafe fn test_mm512_mulhi_epu16() {
13009        let a = _mm512_set1_epi16(1);
13010        let b = _mm512_set1_epi16(1);
13011        let r = _mm512_mulhi_epu16(a, b);
13012        let e = _mm512_set1_epi16(0);
13013        assert_eq_m512i(r, e);
13014    }
13015
13016    #[simd_test(enable = "avx512bw")]
13017    unsafe fn test_mm512_mask_mulhi_epu16() {
13018        let a = _mm512_set1_epi16(1);
13019        let b = _mm512_set1_epi16(1);
13020        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13021        assert_eq_m512i(r, a);
13022        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13023        #[rustfmt::skip]
13024        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13025                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13026        assert_eq_m512i(r, e);
13027    }
13028
13029    #[simd_test(enable = "avx512bw")]
13030    unsafe fn test_mm512_maskz_mulhi_epu16() {
13031        let a = _mm512_set1_epi16(1);
13032        let b = _mm512_set1_epi16(1);
13033        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13034        assert_eq_m512i(r, _mm512_setzero_si512());
13035        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13036        #[rustfmt::skip]
13037        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13038                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13039        assert_eq_m512i(r, e);
13040    }
13041
13042    #[simd_test(enable = "avx512bw,avx512vl")]
13043    unsafe fn test_mm256_mask_mulhi_epu16() {
13044        let a = _mm256_set1_epi16(1);
13045        let b = _mm256_set1_epi16(1);
13046        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13047        assert_eq_m256i(r, a);
13048        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13049        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13050        assert_eq_m256i(r, e);
13051    }
13052
13053    #[simd_test(enable = "avx512bw,avx512vl")]
13054    unsafe fn test_mm256_maskz_mulhi_epu16() {
13055        let a = _mm256_set1_epi16(1);
13056        let b = _mm256_set1_epi16(1);
13057        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13058        assert_eq_m256i(r, _mm256_setzero_si256());
13059        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13060        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13061        assert_eq_m256i(r, e);
13062    }
13063
13064    #[simd_test(enable = "avx512bw,avx512vl")]
13065    unsafe fn test_mm_mask_mulhi_epu16() {
13066        let a = _mm_set1_epi16(1);
13067        let b = _mm_set1_epi16(1);
13068        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13069        assert_eq_m128i(r, a);
13070        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13071        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13072        assert_eq_m128i(r, e);
13073    }
13074
13075    #[simd_test(enable = "avx512bw,avx512vl")]
13076    unsafe fn test_mm_maskz_mulhi_epu16() {
13077        let a = _mm_set1_epi16(1);
13078        let b = _mm_set1_epi16(1);
13079        let r = _mm_maskz_mulhi_epu16(0, a, b);
13080        assert_eq_m128i(r, _mm_setzero_si128());
13081        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13082        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13083        assert_eq_m128i(r, e);
13084    }
13085
13086    #[simd_test(enable = "avx512bw")]
13087    unsafe fn test_mm512_mulhi_epi16() {
13088        let a = _mm512_set1_epi16(1);
13089        let b = _mm512_set1_epi16(1);
13090        let r = _mm512_mulhi_epi16(a, b);
13091        let e = _mm512_set1_epi16(0);
13092        assert_eq_m512i(r, e);
13093    }
13094
13095    #[simd_test(enable = "avx512bw")]
13096    unsafe fn test_mm512_mask_mulhi_epi16() {
13097        let a = _mm512_set1_epi16(1);
13098        let b = _mm512_set1_epi16(1);
13099        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13100        assert_eq_m512i(r, a);
13101        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13102        #[rustfmt::skip]
13103        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13104                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13105        assert_eq_m512i(r, e);
13106    }
13107
13108    #[simd_test(enable = "avx512bw")]
13109    unsafe fn test_mm512_maskz_mulhi_epi16() {
13110        let a = _mm512_set1_epi16(1);
13111        let b = _mm512_set1_epi16(1);
13112        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13113        assert_eq_m512i(r, _mm512_setzero_si512());
13114        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13115        #[rustfmt::skip]
13116        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13117                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13118        assert_eq_m512i(r, e);
13119    }
13120
13121    #[simd_test(enable = "avx512bw,avx512vl")]
13122    unsafe fn test_mm256_mask_mulhi_epi16() {
13123        let a = _mm256_set1_epi16(1);
13124        let b = _mm256_set1_epi16(1);
13125        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
13126        assert_eq_m256i(r, a);
13127        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
13128        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13129        assert_eq_m256i(r, e);
13130    }
13131
13132    #[simd_test(enable = "avx512bw,avx512vl")]
13133    unsafe fn test_mm256_maskz_mulhi_epi16() {
13134        let a = _mm256_set1_epi16(1);
13135        let b = _mm256_set1_epi16(1);
13136        let r = _mm256_maskz_mulhi_epi16(0, a, b);
13137        assert_eq_m256i(r, _mm256_setzero_si256());
13138        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
13139        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13140        assert_eq_m256i(r, e);
13141    }
13142
13143    #[simd_test(enable = "avx512bw,avx512vl")]
13144    unsafe fn test_mm_mask_mulhi_epi16() {
13145        let a = _mm_set1_epi16(1);
13146        let b = _mm_set1_epi16(1);
13147        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
13148        assert_eq_m128i(r, a);
13149        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
13150        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13151        assert_eq_m128i(r, e);
13152    }
13153
13154    #[simd_test(enable = "avx512bw,avx512vl")]
13155    unsafe fn test_mm_maskz_mulhi_epi16() {
13156        let a = _mm_set1_epi16(1);
13157        let b = _mm_set1_epi16(1);
13158        let r = _mm_maskz_mulhi_epi16(0, a, b);
13159        assert_eq_m128i(r, _mm_setzero_si128());
13160        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
13161        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13162        assert_eq_m128i(r, e);
13163    }
13164
13165    #[simd_test(enable = "avx512bw")]
13166    unsafe fn test_mm512_mulhrs_epi16() {
13167        let a = _mm512_set1_epi16(1);
13168        let b = _mm512_set1_epi16(1);
13169        let r = _mm512_mulhrs_epi16(a, b);
13170        let e = _mm512_set1_epi16(0);
13171        assert_eq_m512i(r, e);
13172    }
13173
13174    #[simd_test(enable = "avx512bw")]
13175    unsafe fn test_mm512_mask_mulhrs_epi16() {
13176        let a = _mm512_set1_epi16(1);
13177        let b = _mm512_set1_epi16(1);
13178        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
13179        assert_eq_m512i(r, a);
13180        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13181        #[rustfmt::skip]
13182        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13183                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13184        assert_eq_m512i(r, e);
13185    }
13186
13187    #[simd_test(enable = "avx512bw")]
13188    unsafe fn test_mm512_maskz_mulhrs_epi16() {
13189        let a = _mm512_set1_epi16(1);
13190        let b = _mm512_set1_epi16(1);
13191        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
13192        assert_eq_m512i(r, _mm512_setzero_si512());
13193        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13194        #[rustfmt::skip]
13195        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13196                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13197        assert_eq_m512i(r, e);
13198    }
13199
13200    #[simd_test(enable = "avx512bw,avx512vl")]
13201    unsafe fn test_mm256_mask_mulhrs_epi16() {
13202        let a = _mm256_set1_epi16(1);
13203        let b = _mm256_set1_epi16(1);
13204        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
13205        assert_eq_m256i(r, a);
13206        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
13207        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13208        assert_eq_m256i(r, e);
13209    }
13210
13211    #[simd_test(enable = "avx512bw,avx512vl")]
13212    unsafe fn test_mm256_maskz_mulhrs_epi16() {
13213        let a = _mm256_set1_epi16(1);
13214        let b = _mm256_set1_epi16(1);
13215        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
13216        assert_eq_m256i(r, _mm256_setzero_si256());
13217        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
13218        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13219        assert_eq_m256i(r, e);
13220    }
13221
13222    #[simd_test(enable = "avx512bw,avx512vl")]
13223    unsafe fn test_mm_mask_mulhrs_epi16() {
13224        let a = _mm_set1_epi16(1);
13225        let b = _mm_set1_epi16(1);
13226        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
13227        assert_eq_m128i(r, a);
13228        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
13229        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13230        assert_eq_m128i(r, e);
13231    }
13232
13233    #[simd_test(enable = "avx512bw,avx512vl")]
13234    unsafe fn test_mm_maskz_mulhrs_epi16() {
13235        let a = _mm_set1_epi16(1);
13236        let b = _mm_set1_epi16(1);
13237        let r = _mm_maskz_mulhrs_epi16(0, a, b);
13238        assert_eq_m128i(r, _mm_setzero_si128());
13239        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
13240        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13241        assert_eq_m128i(r, e);
13242    }
13243
13244    #[simd_test(enable = "avx512bw")]
13245    unsafe fn test_mm512_mullo_epi16() {
13246        let a = _mm512_set1_epi16(1);
13247        let b = _mm512_set1_epi16(1);
13248        let r = _mm512_mullo_epi16(a, b);
13249        let e = _mm512_set1_epi16(1);
13250        assert_eq_m512i(r, e);
13251    }
13252
13253    #[simd_test(enable = "avx512bw")]
13254    unsafe fn test_mm512_mask_mullo_epi16() {
13255        let a = _mm512_set1_epi16(1);
13256        let b = _mm512_set1_epi16(1);
13257        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
13258        assert_eq_m512i(r, a);
13259        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13260        #[rustfmt::skip]
13261        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13262                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13263        assert_eq_m512i(r, e);
13264    }
13265
13266    #[simd_test(enable = "avx512bw")]
13267    unsafe fn test_mm512_maskz_mullo_epi16() {
13268        let a = _mm512_set1_epi16(1);
13269        let b = _mm512_set1_epi16(1);
13270        let r = _mm512_maskz_mullo_epi16(0, a, b);
13271        assert_eq_m512i(r, _mm512_setzero_si512());
13272        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
13273        #[rustfmt::skip]
13274        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13275                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13276        assert_eq_m512i(r, e);
13277    }
13278
13279    #[simd_test(enable = "avx512bw,avx512vl")]
13280    unsafe fn test_mm256_mask_mullo_epi16() {
13281        let a = _mm256_set1_epi16(1);
13282        let b = _mm256_set1_epi16(1);
13283        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
13284        assert_eq_m256i(r, a);
13285        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
13286        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13287        assert_eq_m256i(r, e);
13288    }
13289
13290    #[simd_test(enable = "avx512bw,avx512vl")]
13291    unsafe fn test_mm256_maskz_mullo_epi16() {
13292        let a = _mm256_set1_epi16(1);
13293        let b = _mm256_set1_epi16(1);
13294        let r = _mm256_maskz_mullo_epi16(0, a, b);
13295        assert_eq_m256i(r, _mm256_setzero_si256());
13296        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
13297        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13298        assert_eq_m256i(r, e);
13299    }
13300
13301    #[simd_test(enable = "avx512bw,avx512vl")]
13302    unsafe fn test_mm_mask_mullo_epi16() {
13303        let a = _mm_set1_epi16(1);
13304        let b = _mm_set1_epi16(1);
13305        let r = _mm_mask_mullo_epi16(a, 0, a, b);
13306        assert_eq_m128i(r, a);
13307        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
13308        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
13309        assert_eq_m128i(r, e);
13310    }
13311
13312    #[simd_test(enable = "avx512bw,avx512vl")]
13313    unsafe fn test_mm_maskz_mullo_epi16() {
13314        let a = _mm_set1_epi16(1);
13315        let b = _mm_set1_epi16(1);
13316        let r = _mm_maskz_mullo_epi16(0, a, b);
13317        assert_eq_m128i(r, _mm_setzero_si128());
13318        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
13319        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
13320        assert_eq_m128i(r, e);
13321    }
13322
13323    #[simd_test(enable = "avx512bw")]
13324    unsafe fn test_mm512_max_epu16() {
13325        #[rustfmt::skip]
13326        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13327                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13328        #[rustfmt::skip]
13329        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13330                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13331        let r = _mm512_max_epu16(a, b);
13332        #[rustfmt::skip]
13333        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13334                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13335        assert_eq_m512i(r, e);
13336    }
13337
13338    #[simd_test(enable = "avx512bw")]
13339    unsafe fn test_mm512_mask_max_epu16() {
13340        #[rustfmt::skip]
13341        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13342                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13343        #[rustfmt::skip]
13344        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13345                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13346        let r = _mm512_mask_max_epu16(a, 0, a, b);
13347        assert_eq_m512i(r, a);
13348        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13349        #[rustfmt::skip]
13350        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13351                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13352        assert_eq_m512i(r, e);
13353    }
13354
13355    #[simd_test(enable = "avx512bw")]
13356    unsafe fn test_mm512_maskz_max_epu16() {
13357        #[rustfmt::skip]
13358        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13359                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13360        #[rustfmt::skip]
13361        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13362                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13363        let r = _mm512_maskz_max_epu16(0, a, b);
13364        assert_eq_m512i(r, _mm512_setzero_si512());
13365        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
13366        #[rustfmt::skip]
13367        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13368                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13369        assert_eq_m512i(r, e);
13370    }
13371
13372    #[simd_test(enable = "avx512bw,avx512vl")]
13373    unsafe fn test_mm256_mask_max_epu16() {
13374        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13375        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13376        let r = _mm256_mask_max_epu16(a, 0, a, b);
13377        assert_eq_m256i(r, a);
13378        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
13379        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13380        assert_eq_m256i(r, e);
13381    }
13382
13383    #[simd_test(enable = "avx512bw,avx512vl")]
13384    unsafe fn test_mm256_maskz_max_epu16() {
13385        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13386        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13387        let r = _mm256_maskz_max_epu16(0, a, b);
13388        assert_eq_m256i(r, _mm256_setzero_si256());
13389        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
13390        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13391        assert_eq_m256i(r, e);
13392    }
13393
13394    #[simd_test(enable = "avx512bw,avx512vl")]
13395    unsafe fn test_mm_mask_max_epu16() {
13396        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13397        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13398        let r = _mm_mask_max_epu16(a, 0, a, b);
13399        assert_eq_m128i(r, a);
13400        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
13401        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13402        assert_eq_m128i(r, e);
13403    }
13404
13405    #[simd_test(enable = "avx512bw,avx512vl")]
13406    unsafe fn test_mm_maskz_max_epu16() {
13407        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13408        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13409        let r = _mm_maskz_max_epu16(0, a, b);
13410        assert_eq_m128i(r, _mm_setzero_si128());
13411        let r = _mm_maskz_max_epu16(0b00001111, a, b);
13412        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13413        assert_eq_m128i(r, e);
13414    }
13415
13416    #[simd_test(enable = "avx512bw")]
13417    unsafe fn test_mm512_max_epu8() {
13418        #[rustfmt::skip]
13419        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13420                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13421                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13422                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13423        #[rustfmt::skip]
13424        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13425                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13426                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13427                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13428        let r = _mm512_max_epu8(a, b);
13429        #[rustfmt::skip]
13430        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13431                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13432                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13433                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13434        assert_eq_m512i(r, e);
13435    }
13436
13437    #[simd_test(enable = "avx512bw")]
13438    unsafe fn test_mm512_mask_max_epu8() {
13439        #[rustfmt::skip]
13440        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13441                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13442                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13443                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13444        #[rustfmt::skip]
13445        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13446                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13447                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13448                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13449        let r = _mm512_mask_max_epu8(a, 0, a, b);
13450        assert_eq_m512i(r, a);
13451        let r = _mm512_mask_max_epu8(
13452            a,
13453            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13454            a,
13455            b,
13456        );
13457        #[rustfmt::skip]
13458        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13459                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13460                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13461                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13462        assert_eq_m512i(r, e);
13463    }
13464
13465    #[simd_test(enable = "avx512bw")]
13466    unsafe fn test_mm512_maskz_max_epu8() {
13467        #[rustfmt::skip]
13468        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13469                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13470                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13471                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13472        #[rustfmt::skip]
13473        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13474                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13475                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13476                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13477        let r = _mm512_maskz_max_epu8(0, a, b);
13478        assert_eq_m512i(r, _mm512_setzero_si512());
13479        let r = _mm512_maskz_max_epu8(
13480            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13481            a,
13482            b,
13483        );
13484        #[rustfmt::skip]
13485        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13486                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13487                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13488                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13489        assert_eq_m512i(r, e);
13490    }
13491
13492    #[simd_test(enable = "avx512bw,avx512vl")]
13493    unsafe fn test_mm256_mask_max_epu8() {
13494        #[rustfmt::skip]
13495        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13496                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13497        #[rustfmt::skip]
13498        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13499                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13500        let r = _mm256_mask_max_epu8(a, 0, a, b);
13501        assert_eq_m256i(r, a);
13502        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13503        #[rustfmt::skip]
13504        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13505                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13506        assert_eq_m256i(r, e);
13507    }
13508
13509    #[simd_test(enable = "avx512bw,avx512vl")]
13510    unsafe fn test_mm256_maskz_max_epu8() {
13511        #[rustfmt::skip]
13512        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13513                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13514        #[rustfmt::skip]
13515        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13516                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13517        let r = _mm256_maskz_max_epu8(0, a, b);
13518        assert_eq_m256i(r, _mm256_setzero_si256());
13519        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
13520        #[rustfmt::skip]
13521        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13522                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13523        assert_eq_m256i(r, e);
13524    }
13525
13526    #[simd_test(enable = "avx512bw,avx512vl")]
13527    unsafe fn test_mm_mask_max_epu8() {
13528        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13529        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13530        let r = _mm_mask_max_epu8(a, 0, a, b);
13531        assert_eq_m128i(r, a);
13532        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
13533        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13534        assert_eq_m128i(r, e);
13535    }
13536
13537    #[simd_test(enable = "avx512bw,avx512vl")]
13538    unsafe fn test_mm_maskz_max_epu8() {
13539        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13540        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13541        let r = _mm_maskz_max_epu8(0, a, b);
13542        assert_eq_m128i(r, _mm_setzero_si128());
13543        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
13544        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13545        assert_eq_m128i(r, e);
13546    }
13547
13548    #[simd_test(enable = "avx512bw")]
13549    unsafe fn test_mm512_max_epi16() {
13550        #[rustfmt::skip]
13551        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13552                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13553        #[rustfmt::skip]
13554        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13555                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13556        let r = _mm512_max_epi16(a, b);
13557        #[rustfmt::skip]
13558        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13559                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13560        assert_eq_m512i(r, e);
13561    }
13562
13563    #[simd_test(enable = "avx512bw")]
13564    unsafe fn test_mm512_mask_max_epi16() {
13565        #[rustfmt::skip]
13566        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13567                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13568        #[rustfmt::skip]
13569        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13570                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13571        let r = _mm512_mask_max_epi16(a, 0, a, b);
13572        assert_eq_m512i(r, a);
13573        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13574        #[rustfmt::skip]
13575        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13576                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13577        assert_eq_m512i(r, e);
13578    }
13579
13580    #[simd_test(enable = "avx512bw")]
13581    unsafe fn test_mm512_maskz_max_epi16() {
13582        #[rustfmt::skip]
13583        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13584                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13585        #[rustfmt::skip]
13586        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13587                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13588        let r = _mm512_maskz_max_epi16(0, a, b);
13589        assert_eq_m512i(r, _mm512_setzero_si512());
13590        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
13591        #[rustfmt::skip]
13592        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13593                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13594        assert_eq_m512i(r, e);
13595    }
13596
13597    #[simd_test(enable = "avx512bw,avx512vl")]
13598    unsafe fn test_mm256_mask_max_epi16() {
13599        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13600        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13601        let r = _mm256_mask_max_epi16(a, 0, a, b);
13602        assert_eq_m256i(r, a);
13603        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
13604        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13605        assert_eq_m256i(r, e);
13606    }
13607
13608    #[simd_test(enable = "avx512bw,avx512vl")]
13609    unsafe fn test_mm256_maskz_max_epi16() {
13610        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13611        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13612        let r = _mm256_maskz_max_epi16(0, a, b);
13613        assert_eq_m256i(r, _mm256_setzero_si256());
13614        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
13615        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13616        assert_eq_m256i(r, e);
13617    }
13618
13619    #[simd_test(enable = "avx512bw,avx512vl")]
13620    unsafe fn test_mm_mask_max_epi16() {
13621        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13622        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13623        let r = _mm_mask_max_epi16(a, 0, a, b);
13624        assert_eq_m128i(r, a);
13625        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
13626        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13627        assert_eq_m128i(r, e);
13628    }
13629
13630    #[simd_test(enable = "avx512bw,avx512vl")]
13631    unsafe fn test_mm_maskz_max_epi16() {
13632        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13633        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13634        let r = _mm_maskz_max_epi16(0, a, b);
13635        assert_eq_m128i(r, _mm_setzero_si128());
13636        let r = _mm_maskz_max_epi16(0b00001111, a, b);
13637        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13638        assert_eq_m128i(r, e);
13639    }
13640
13641    #[simd_test(enable = "avx512bw")]
13642    unsafe fn test_mm512_max_epi8() {
13643        #[rustfmt::skip]
13644        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13645                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13646                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13647                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13648        #[rustfmt::skip]
13649        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13650                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13651                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13652                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13653        let r = _mm512_max_epi8(a, b);
13654        #[rustfmt::skip]
13655        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13656                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13657                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13658                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13659        assert_eq_m512i(r, e);
13660    }
13661
13662    #[simd_test(enable = "avx512bw")]
13663    unsafe fn test_mm512_mask_max_epi8() {
13664        #[rustfmt::skip]
13665        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13666                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13667                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13668                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13669        #[rustfmt::skip]
13670        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13671                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13672                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13673                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13674        let r = _mm512_mask_max_epi8(a, 0, a, b);
13675        assert_eq_m512i(r, a);
13676        let r = _mm512_mask_max_epi8(
13677            a,
13678            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13679            a,
13680            b,
13681        );
13682        #[rustfmt::skip]
13683        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13684                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13685                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13686                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13687        assert_eq_m512i(r, e);
13688    }
13689
13690    #[simd_test(enable = "avx512bw")]
13691    unsafe fn test_mm512_maskz_max_epi8() {
13692        #[rustfmt::skip]
13693        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13694                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13695                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13696                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13697        #[rustfmt::skip]
13698        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13699                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13700                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13701                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13702        let r = _mm512_maskz_max_epi8(0, a, b);
13703        assert_eq_m512i(r, _mm512_setzero_si512());
13704        let r = _mm512_maskz_max_epi8(
13705            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13706            a,
13707            b,
13708        );
13709        #[rustfmt::skip]
13710        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13711                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13712                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13713                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13714        assert_eq_m512i(r, e);
13715    }
13716
13717    #[simd_test(enable = "avx512bw,avx512vl")]
13718    unsafe fn test_mm256_mask_max_epi8() {
13719        #[rustfmt::skip]
13720        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13721                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13722        #[rustfmt::skip]
13723        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13724                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13725        let r = _mm256_mask_max_epi8(a, 0, a, b);
13726        assert_eq_m256i(r, a);
13727        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13728        #[rustfmt::skip]
13729        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13730                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13731        assert_eq_m256i(r, e);
13732    }
13733
13734    #[simd_test(enable = "avx512bw,avx512vl")]
13735    unsafe fn test_mm256_maskz_max_epi8() {
13736        #[rustfmt::skip]
13737        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13738                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13739        #[rustfmt::skip]
13740        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13741                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13742        let r = _mm256_maskz_max_epi8(0, a, b);
13743        assert_eq_m256i(r, _mm256_setzero_si256());
13744        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
13745        #[rustfmt::skip]
13746        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13747                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13748        assert_eq_m256i(r, e);
13749    }
13750
13751    #[simd_test(enable = "avx512bw,avx512vl")]
13752    unsafe fn test_mm_mask_max_epi8() {
13753        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13754        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13755        let r = _mm_mask_max_epi8(a, 0, a, b);
13756        assert_eq_m128i(r, a);
13757        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
13758        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13759        assert_eq_m128i(r, e);
13760    }
13761
13762    #[simd_test(enable = "avx512bw,avx512vl")]
13763    unsafe fn test_mm_maskz_max_epi8() {
13764        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13765        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13766        let r = _mm_maskz_max_epi8(0, a, b);
13767        assert_eq_m128i(r, _mm_setzero_si128());
13768        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
13769        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13770        assert_eq_m128i(r, e);
13771    }
13772
13773    #[simd_test(enable = "avx512bw")]
13774    unsafe fn test_mm512_min_epu16() {
13775        #[rustfmt::skip]
13776        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13777                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13778        #[rustfmt::skip]
13779        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13780                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13781        let r = _mm512_min_epu16(a, b);
13782        #[rustfmt::skip]
13783        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13784                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13785        assert_eq_m512i(r, e);
13786    }
13787
13788    #[simd_test(enable = "avx512bw")]
13789    unsafe fn test_mm512_mask_min_epu16() {
13790        #[rustfmt::skip]
13791        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13792                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13793        #[rustfmt::skip]
13794        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13795                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13796        let r = _mm512_mask_min_epu16(a, 0, a, b);
13797        assert_eq_m512i(r, a);
13798        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13799        #[rustfmt::skip]
13800        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13801                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13802        assert_eq_m512i(r, e);
13803    }
13804
13805    #[simd_test(enable = "avx512bw")]
13806    unsafe fn test_mm512_maskz_min_epu16() {
13807        #[rustfmt::skip]
13808        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13809                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13810        #[rustfmt::skip]
13811        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13812                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13813        let r = _mm512_maskz_min_epu16(0, a, b);
13814        assert_eq_m512i(r, _mm512_setzero_si512());
13815        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
13816        #[rustfmt::skip]
13817        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13818                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13819        assert_eq_m512i(r, e);
13820    }
13821
13822    #[simd_test(enable = "avx512bw,avx512vl")]
13823    unsafe fn test_mm256_mask_min_epu16() {
13824        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13825        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13826        let r = _mm256_mask_min_epu16(a, 0, a, b);
13827        assert_eq_m256i(r, a);
13828        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
13829        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13830        assert_eq_m256i(r, e);
13831    }
13832
13833    #[simd_test(enable = "avx512bw,avx512vl")]
13834    unsafe fn test_mm256_maskz_min_epu16() {
13835        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13836        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13837        let r = _mm256_maskz_min_epu16(0, a, b);
13838        assert_eq_m256i(r, _mm256_setzero_si256());
13839        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
13840        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13841        assert_eq_m256i(r, e);
13842    }
13843
13844    #[simd_test(enable = "avx512bw,avx512vl")]
13845    unsafe fn test_mm_mask_min_epu16() {
13846        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13847        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13848        let r = _mm_mask_min_epu16(a, 0, a, b);
13849        assert_eq_m128i(r, a);
13850        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
13851        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
13852        assert_eq_m128i(r, e);
13853    }
13854
13855    #[simd_test(enable = "avx512bw,avx512vl")]
13856    unsafe fn test_mm_maskz_min_epu16() {
13857        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13858        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13859        let r = _mm_maskz_min_epu16(0, a, b);
13860        assert_eq_m128i(r, _mm_setzero_si128());
13861        let r = _mm_maskz_min_epu16(0b00001111, a, b);
13862        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
13863        assert_eq_m128i(r, e);
13864    }
13865
13866    #[simd_test(enable = "avx512bw")]
13867    unsafe fn test_mm512_min_epu8() {
13868        #[rustfmt::skip]
13869        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13870                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13871                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13872                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13873        #[rustfmt::skip]
13874        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13875                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13876                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13877                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13878        let r = _mm512_min_epu8(a, b);
13879        #[rustfmt::skip]
13880        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13881                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13882                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13883                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13884        assert_eq_m512i(r, e);
13885    }
13886
13887    #[simd_test(enable = "avx512bw")]
13888    unsafe fn test_mm512_mask_min_epu8() {
13889        #[rustfmt::skip]
13890        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13891                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13892                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13893                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13894        #[rustfmt::skip]
13895        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13896                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13897                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13898                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13899        let r = _mm512_mask_min_epu8(a, 0, a, b);
13900        assert_eq_m512i(r, a);
13901        let r = _mm512_mask_min_epu8(
13902            a,
13903            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13904            a,
13905            b,
13906        );
13907        #[rustfmt::skip]
13908        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13909                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13910                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13911                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13912        assert_eq_m512i(r, e);
13913    }
13914
13915    #[simd_test(enable = "avx512bw")]
13916    unsafe fn test_mm512_maskz_min_epu8() {
13917        #[rustfmt::skip]
13918        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13919                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13920                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13921                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13922        #[rustfmt::skip]
13923        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13924                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13925                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13926                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13927        let r = _mm512_maskz_min_epu8(0, a, b);
13928        assert_eq_m512i(r, _mm512_setzero_si512());
13929        let r = _mm512_maskz_min_epu8(
13930            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13931            a,
13932            b,
13933        );
13934        #[rustfmt::skip]
13935        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13936                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13937                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13938                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13939        assert_eq_m512i(r, e);
13940    }
13941
13942    #[simd_test(enable = "avx512bw,avx512vl")]
13943    unsafe fn test_mm256_mask_min_epu8() {
13944        #[rustfmt::skip]
13945        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13946                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13947        #[rustfmt::skip]
13948        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13949                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13950        let r = _mm256_mask_min_epu8(a, 0, a, b);
13951        assert_eq_m256i(r, a);
13952        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13953        #[rustfmt::skip]
13954        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13955                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13956        assert_eq_m256i(r, e);
13957    }
13958
13959    #[simd_test(enable = "avx512bw,avx512vl")]
13960    unsafe fn test_mm256_maskz_min_epu8() {
13961        #[rustfmt::skip]
13962        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13963                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13964        #[rustfmt::skip]
13965        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13966                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13967        let r = _mm256_maskz_min_epu8(0, a, b);
13968        assert_eq_m256i(r, _mm256_setzero_si256());
13969        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
13970        #[rustfmt::skip]
13971        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13972                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13973        assert_eq_m256i(r, e);
13974    }
13975
13976    #[simd_test(enable = "avx512bw,avx512vl")]
13977    unsafe fn test_mm_mask_min_epu8() {
13978        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13979        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13980        let r = _mm_mask_min_epu8(a, 0, a, b);
13981        assert_eq_m128i(r, a);
13982        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
13983        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13984        assert_eq_m128i(r, e);
13985    }
13986
13987    #[simd_test(enable = "avx512bw,avx512vl")]
13988    unsafe fn test_mm_maskz_min_epu8() {
13989        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13990        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13991        let r = _mm_maskz_min_epu8(0, a, b);
13992        assert_eq_m128i(r, _mm_setzero_si128());
13993        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
13994        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13995        assert_eq_m128i(r, e);
13996    }
13997
13998    #[simd_test(enable = "avx512bw")]
13999    unsafe fn test_mm512_min_epi16() {
14000        #[rustfmt::skip]
14001        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14002                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14003        #[rustfmt::skip]
14004        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14005                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14006        let r = _mm512_min_epi16(a, b);
14007        #[rustfmt::skip]
14008        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14009                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14010        assert_eq_m512i(r, e);
14011    }
14012
14013    #[simd_test(enable = "avx512bw")]
14014    unsafe fn test_mm512_mask_min_epi16() {
14015        #[rustfmt::skip]
14016        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14017                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14018        #[rustfmt::skip]
14019        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14020                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14021        let r = _mm512_mask_min_epi16(a, 0, a, b);
14022        assert_eq_m512i(r, a);
14023        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14024        #[rustfmt::skip]
14025        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14026                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14027        assert_eq_m512i(r, e);
14028    }
14029
14030    #[simd_test(enable = "avx512bw")]
14031    unsafe fn test_mm512_maskz_min_epi16() {
14032        #[rustfmt::skip]
14033        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14034                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14035        #[rustfmt::skip]
14036        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14037                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14038        let r = _mm512_maskz_min_epi16(0, a, b);
14039        assert_eq_m512i(r, _mm512_setzero_si512());
14040        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14041        #[rustfmt::skip]
14042        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14043                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14044        assert_eq_m512i(r, e);
14045    }
14046
14047    #[simd_test(enable = "avx512bw,avx512vl")]
14048    unsafe fn test_mm256_mask_min_epi16() {
14049        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14050        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14051        let r = _mm256_mask_min_epi16(a, 0, a, b);
14052        assert_eq_m256i(r, a);
14053        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14054        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14055        assert_eq_m256i(r, e);
14056    }
14057
14058    #[simd_test(enable = "avx512bw,avx512vl")]
14059    unsafe fn test_mm256_maskz_min_epi16() {
14060        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14061        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14062        let r = _mm256_maskz_min_epi16(0, a, b);
14063        assert_eq_m256i(r, _mm256_setzero_si256());
14064        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14065        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14066        assert_eq_m256i(r, e);
14067    }
14068
14069    #[simd_test(enable = "avx512bw,avx512vl")]
14070    unsafe fn test_mm_mask_min_epi16() {
14071        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14072        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14073        let r = _mm_mask_min_epi16(a, 0, a, b);
14074        assert_eq_m128i(r, a);
14075        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14076        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14077        assert_eq_m128i(r, e);
14078    }
14079
14080    #[simd_test(enable = "avx512bw,avx512vl")]
14081    unsafe fn test_mm_maskz_min_epi16() {
14082        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14083        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14084        let r = _mm_maskz_min_epi16(0, a, b);
14085        assert_eq_m128i(r, _mm_setzero_si128());
14086        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14087        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14088        assert_eq_m128i(r, e);
14089    }
14090
14091    #[simd_test(enable = "avx512bw")]
14092    unsafe fn test_mm512_min_epi8() {
14093        #[rustfmt::skip]
14094        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14095                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14096                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14097                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14098        #[rustfmt::skip]
14099        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14100                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14101                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14102                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14103        let r = _mm512_min_epi8(a, b);
14104        #[rustfmt::skip]
14105        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14106                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14107                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14108                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14109        assert_eq_m512i(r, e);
14110    }
14111
14112    #[simd_test(enable = "avx512bw")]
14113    unsafe fn test_mm512_mask_min_epi8() {
14114        #[rustfmt::skip]
14115        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14116                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14117                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14118                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14119        #[rustfmt::skip]
14120        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14121                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14122                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14123                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14124        let r = _mm512_mask_min_epi8(a, 0, a, b);
14125        assert_eq_m512i(r, a);
14126        let r = _mm512_mask_min_epi8(
14127            a,
14128            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14129            a,
14130            b,
14131        );
14132        #[rustfmt::skip]
14133        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14134                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14135                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14136                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14137        assert_eq_m512i(r, e);
14138    }
14139
14140    #[simd_test(enable = "avx512bw")]
14141    unsafe fn test_mm512_maskz_min_epi8() {
14142        #[rustfmt::skip]
14143        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14144                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14145                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14146                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14147        #[rustfmt::skip]
14148        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14149                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14150                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14151                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14152        let r = _mm512_maskz_min_epi8(0, a, b);
14153        assert_eq_m512i(r, _mm512_setzero_si512());
14154        let r = _mm512_maskz_min_epi8(
14155            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14156            a,
14157            b,
14158        );
14159        #[rustfmt::skip]
14160        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14161                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14162                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14163                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14164        assert_eq_m512i(r, e);
14165    }
14166
14167    #[simd_test(enable = "avx512bw,avx512vl")]
14168    unsafe fn test_mm256_mask_min_epi8() {
14169        #[rustfmt::skip]
14170        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14171                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14172        #[rustfmt::skip]
14173        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14174                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14175        let r = _mm256_mask_min_epi8(a, 0, a, b);
14176        assert_eq_m256i(r, a);
14177        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14178        #[rustfmt::skip]
14179        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14180                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14181        assert_eq_m256i(r, e);
14182    }
14183
14184    #[simd_test(enable = "avx512bw,avx512vl")]
14185    unsafe fn test_mm256_maskz_min_epi8() {
14186        #[rustfmt::skip]
14187        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14188                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14189        #[rustfmt::skip]
14190        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14191                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14192        let r = _mm256_maskz_min_epi8(0, a, b);
14193        assert_eq_m256i(r, _mm256_setzero_si256());
14194        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
14195        #[rustfmt::skip]
14196        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14197                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14198        assert_eq_m256i(r, e);
14199    }
14200
14201    #[simd_test(enable = "avx512bw,avx512vl")]
14202    unsafe fn test_mm_mask_min_epi8() {
14203        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14204        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14205        let r = _mm_mask_min_epi8(a, 0, a, b);
14206        assert_eq_m128i(r, a);
14207        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
14208        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14209        assert_eq_m128i(r, e);
14210    }
14211
14212    #[simd_test(enable = "avx512bw,avx512vl")]
14213    unsafe fn test_mm_maskz_min_epi8() {
14214        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14215        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14216        let r = _mm_maskz_min_epi8(0, a, b);
14217        assert_eq_m128i(r, _mm_setzero_si128());
14218        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
14219        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14220        assert_eq_m128i(r, e);
14221    }
14222
14223    #[simd_test(enable = "avx512bw")]
14224    unsafe fn test_mm512_cmplt_epu16_mask() {
14225        let a = _mm512_set1_epi16(-2);
14226        let b = _mm512_set1_epi16(-1);
14227        let m = _mm512_cmplt_epu16_mask(a, b);
14228        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14229    }
14230
14231    #[simd_test(enable = "avx512bw")]
14232    unsafe fn test_mm512_mask_cmplt_epu16_mask() {
14233        let a = _mm512_set1_epi16(-2);
14234        let b = _mm512_set1_epi16(-1);
14235        let mask = 0b01010101_01010101_01010101_01010101;
14236        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
14237        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14238    }
14239
14240    #[simd_test(enable = "avx512bw,avx512vl")]
14241    unsafe fn test_mm256_cmplt_epu16_mask() {
14242        let a = _mm256_set1_epi16(-2);
14243        let b = _mm256_set1_epi16(-1);
14244        let m = _mm256_cmplt_epu16_mask(a, b);
14245        assert_eq!(m, 0b11111111_11111111);
14246    }
14247
14248    #[simd_test(enable = "avx512bw,avx512vl")]
14249    unsafe fn test_mm256_mask_cmplt_epu16_mask() {
14250        let a = _mm256_set1_epi16(-2);
14251        let b = _mm256_set1_epi16(-1);
14252        let mask = 0b01010101_01010101;
14253        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
14254        assert_eq!(r, 0b01010101_01010101);
14255    }
14256
14257    #[simd_test(enable = "avx512bw,avx512vl")]
14258    unsafe fn test_mm_cmplt_epu16_mask() {
14259        let a = _mm_set1_epi16(-2);
14260        let b = _mm_set1_epi16(-1);
14261        let m = _mm_cmplt_epu16_mask(a, b);
14262        assert_eq!(m, 0b11111111);
14263    }
14264
14265    #[simd_test(enable = "avx512bw,avx512vl")]
14266    unsafe fn test_mm_mask_cmplt_epu16_mask() {
14267        let a = _mm_set1_epi16(-2);
14268        let b = _mm_set1_epi16(-1);
14269        let mask = 0b01010101;
14270        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
14271        assert_eq!(r, 0b01010101);
14272    }
14273
14274    #[simd_test(enable = "avx512bw")]
14275    unsafe fn test_mm512_cmplt_epu8_mask() {
14276        let a = _mm512_set1_epi8(-2);
14277        let b = _mm512_set1_epi8(-1);
14278        let m = _mm512_cmplt_epu8_mask(a, b);
14279        assert_eq!(
14280            m,
14281            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14282        );
14283    }
14284
14285    #[simd_test(enable = "avx512bw")]
14286    unsafe fn test_mm512_mask_cmplt_epu8_mask() {
14287        let a = _mm512_set1_epi8(-2);
14288        let b = _mm512_set1_epi8(-1);
14289        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14290        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
14291        assert_eq!(
14292            r,
14293            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14294        );
14295    }
14296
14297    #[simd_test(enable = "avx512bw,avx512vl")]
14298    unsafe fn test_mm256_cmplt_epu8_mask() {
14299        let a = _mm256_set1_epi8(-2);
14300        let b = _mm256_set1_epi8(-1);
14301        let m = _mm256_cmplt_epu8_mask(a, b);
14302        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14303    }
14304
14305    #[simd_test(enable = "avx512bw,avx512vl")]
14306    unsafe fn test_mm256_mask_cmplt_epu8_mask() {
14307        let a = _mm256_set1_epi8(-2);
14308        let b = _mm256_set1_epi8(-1);
14309        let mask = 0b01010101_01010101_01010101_01010101;
14310        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
14311        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14312    }
14313
14314    #[simd_test(enable = "avx512bw,avx512vl")]
14315    unsafe fn test_mm_cmplt_epu8_mask() {
14316        let a = _mm_set1_epi8(-2);
14317        let b = _mm_set1_epi8(-1);
14318        let m = _mm_cmplt_epu8_mask(a, b);
14319        assert_eq!(m, 0b11111111_11111111);
14320    }
14321
14322    #[simd_test(enable = "avx512bw,avx512vl")]
14323    unsafe fn test_mm_mask_cmplt_epu8_mask() {
14324        let a = _mm_set1_epi8(-2);
14325        let b = _mm_set1_epi8(-1);
14326        let mask = 0b01010101_01010101;
14327        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
14328        assert_eq!(r, 0b01010101_01010101);
14329    }
14330
14331    #[simd_test(enable = "avx512bw")]
14332    unsafe fn test_mm512_cmplt_epi16_mask() {
14333        let a = _mm512_set1_epi16(-2);
14334        let b = _mm512_set1_epi16(-1);
14335        let m = _mm512_cmplt_epi16_mask(a, b);
14336        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14337    }
14338
14339    #[simd_test(enable = "avx512bw")]
14340    unsafe fn test_mm512_mask_cmplt_epi16_mask() {
14341        let a = _mm512_set1_epi16(-2);
14342        let b = _mm512_set1_epi16(-1);
14343        let mask = 0b01010101_01010101_01010101_01010101;
14344        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
14345        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14346    }
14347
14348    #[simd_test(enable = "avx512bw,avx512vl")]
14349    unsafe fn test_mm256_cmplt_epi16_mask() {
14350        let a = _mm256_set1_epi16(-2);
14351        let b = _mm256_set1_epi16(-1);
14352        let m = _mm256_cmplt_epi16_mask(a, b);
14353        assert_eq!(m, 0b11111111_11111111);
14354    }
14355
14356    #[simd_test(enable = "avx512bw,avx512vl")]
14357    unsafe fn test_mm256_mask_cmplt_epi16_mask() {
14358        let a = _mm256_set1_epi16(-2);
14359        let b = _mm256_set1_epi16(-1);
14360        let mask = 0b01010101_01010101;
14361        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
14362        assert_eq!(r, 0b01010101_01010101);
14363    }
14364
14365    #[simd_test(enable = "avx512bw,avx512vl")]
14366    unsafe fn test_mm_cmplt_epi16_mask() {
14367        let a = _mm_set1_epi16(-2);
14368        let b = _mm_set1_epi16(-1);
14369        let m = _mm_cmplt_epi16_mask(a, b);
14370        assert_eq!(m, 0b11111111);
14371    }
14372
14373    #[simd_test(enable = "avx512bw,avx512vl")]
14374    unsafe fn test_mm_mask_cmplt_epi16_mask() {
14375        let a = _mm_set1_epi16(-2);
14376        let b = _mm_set1_epi16(-1);
14377        let mask = 0b01010101;
14378        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
14379        assert_eq!(r, 0b01010101);
14380    }
14381
14382    #[simd_test(enable = "avx512bw")]
14383    unsafe fn test_mm512_cmplt_epi8_mask() {
14384        let a = _mm512_set1_epi8(-2);
14385        let b = _mm512_set1_epi8(-1);
14386        let m = _mm512_cmplt_epi8_mask(a, b);
14387        assert_eq!(
14388            m,
14389            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14390        );
14391    }
14392
14393    #[simd_test(enable = "avx512bw")]
14394    unsafe fn test_mm512_mask_cmplt_epi8_mask() {
14395        let a = _mm512_set1_epi8(-2);
14396        let b = _mm512_set1_epi8(-1);
14397        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14398        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
14399        assert_eq!(
14400            r,
14401            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14402        );
14403    }
14404
14405    #[simd_test(enable = "avx512bw,avx512vl")]
14406    unsafe fn test_mm256_cmplt_epi8_mask() {
14407        let a = _mm256_set1_epi8(-2);
14408        let b = _mm256_set1_epi8(-1);
14409        let m = _mm256_cmplt_epi8_mask(a, b);
14410        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14411    }
14412
14413    #[simd_test(enable = "avx512bw,avx512vl")]
14414    unsafe fn test_mm256_mask_cmplt_epi8_mask() {
14415        let a = _mm256_set1_epi8(-2);
14416        let b = _mm256_set1_epi8(-1);
14417        let mask = 0b01010101_01010101_01010101_01010101;
14418        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
14419        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14420    }
14421
14422    #[simd_test(enable = "avx512bw,avx512vl")]
14423    unsafe fn test_mm_cmplt_epi8_mask() {
14424        let a = _mm_set1_epi8(-2);
14425        let b = _mm_set1_epi8(-1);
14426        let m = _mm_cmplt_epi8_mask(a, b);
14427        assert_eq!(m, 0b11111111_11111111);
14428    }
14429
14430    #[simd_test(enable = "avx512bw,avx512vl")]
14431    unsafe fn test_mm_mask_cmplt_epi8_mask() {
14432        let a = _mm_set1_epi8(-2);
14433        let b = _mm_set1_epi8(-1);
14434        let mask = 0b01010101_01010101;
14435        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
14436        assert_eq!(r, 0b01010101_01010101);
14437    }
14438
14439    #[simd_test(enable = "avx512bw")]
14440    unsafe fn test_mm512_cmpgt_epu16_mask() {
14441        let a = _mm512_set1_epi16(2);
14442        let b = _mm512_set1_epi16(1);
14443        let m = _mm512_cmpgt_epu16_mask(a, b);
14444        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14445    }
14446
14447    #[simd_test(enable = "avx512bw")]
14448    unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
14449        let a = _mm512_set1_epi16(2);
14450        let b = _mm512_set1_epi16(1);
14451        let mask = 0b01010101_01010101_01010101_01010101;
14452        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
14453        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14454    }
14455
14456    #[simd_test(enable = "avx512bw,avx512vl")]
14457    unsafe fn test_mm256_cmpgt_epu16_mask() {
14458        let a = _mm256_set1_epi16(2);
14459        let b = _mm256_set1_epi16(1);
14460        let m = _mm256_cmpgt_epu16_mask(a, b);
14461        assert_eq!(m, 0b11111111_11111111);
14462    }
14463
14464    #[simd_test(enable = "avx512bw,avx512vl")]
14465    unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
14466        let a = _mm256_set1_epi16(2);
14467        let b = _mm256_set1_epi16(1);
14468        let mask = 0b01010101_01010101;
14469        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
14470        assert_eq!(r, 0b01010101_01010101);
14471    }
14472
14473    #[simd_test(enable = "avx512bw,avx512vl")]
14474    unsafe fn test_mm_cmpgt_epu16_mask() {
14475        let a = _mm_set1_epi16(2);
14476        let b = _mm_set1_epi16(1);
14477        let m = _mm_cmpgt_epu16_mask(a, b);
14478        assert_eq!(m, 0b11111111);
14479    }
14480
14481    #[simd_test(enable = "avx512bw,avx512vl")]
14482    unsafe fn test_mm_mask_cmpgt_epu16_mask() {
14483        let a = _mm_set1_epi16(2);
14484        let b = _mm_set1_epi16(1);
14485        let mask = 0b01010101;
14486        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
14487        assert_eq!(r, 0b01010101);
14488    }
14489
14490    #[simd_test(enable = "avx512bw")]
14491    unsafe fn test_mm512_cmpgt_epu8_mask() {
14492        let a = _mm512_set1_epi8(2);
14493        let b = _mm512_set1_epi8(1);
14494        let m = _mm512_cmpgt_epu8_mask(a, b);
14495        assert_eq!(
14496            m,
14497            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14498        );
14499    }
14500
14501    #[simd_test(enable = "avx512bw")]
14502    unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
14503        let a = _mm512_set1_epi8(2);
14504        let b = _mm512_set1_epi8(1);
14505        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14506        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
14507        assert_eq!(
14508            r,
14509            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14510        );
14511    }
14512
14513    #[simd_test(enable = "avx512bw,avx512vl")]
14514    unsafe fn test_mm256_cmpgt_epu8_mask() {
14515        let a = _mm256_set1_epi8(2);
14516        let b = _mm256_set1_epi8(1);
14517        let m = _mm256_cmpgt_epu8_mask(a, b);
14518        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14519    }
14520
14521    #[simd_test(enable = "avx512bw,avx512vl")]
14522    unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
14523        let a = _mm256_set1_epi8(2);
14524        let b = _mm256_set1_epi8(1);
14525        let mask = 0b01010101_01010101_01010101_01010101;
14526        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
14527        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14528    }
14529
14530    #[simd_test(enable = "avx512bw,avx512vl")]
14531    unsafe fn test_mm_cmpgt_epu8_mask() {
14532        let a = _mm_set1_epi8(2);
14533        let b = _mm_set1_epi8(1);
14534        let m = _mm_cmpgt_epu8_mask(a, b);
14535        assert_eq!(m, 0b11111111_11111111);
14536    }
14537
14538    #[simd_test(enable = "avx512bw,avx512vl")]
14539    unsafe fn test_mm_mask_cmpgt_epu8_mask() {
14540        let a = _mm_set1_epi8(2);
14541        let b = _mm_set1_epi8(1);
14542        let mask = 0b01010101_01010101;
14543        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
14544        assert_eq!(r, 0b01010101_01010101);
14545    }
14546
14547    #[simd_test(enable = "avx512bw")]
14548    unsafe fn test_mm512_cmpgt_epi16_mask() {
14549        let a = _mm512_set1_epi16(2);
14550        let b = _mm512_set1_epi16(-1);
14551        let m = _mm512_cmpgt_epi16_mask(a, b);
14552        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14553    }
14554
14555    #[simd_test(enable = "avx512bw")]
14556    unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
14557        let a = _mm512_set1_epi16(2);
14558        let b = _mm512_set1_epi16(-1);
14559        let mask = 0b01010101_01010101_01010101_01010101;
14560        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
14561        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14562    }
14563
14564    #[simd_test(enable = "avx512bw,avx512vl")]
14565    unsafe fn test_mm256_cmpgt_epi16_mask() {
14566        let a = _mm256_set1_epi16(2);
14567        let b = _mm256_set1_epi16(-1);
14568        let m = _mm256_cmpgt_epi16_mask(a, b);
14569        assert_eq!(m, 0b11111111_11111111);
14570    }
14571
14572    #[simd_test(enable = "avx512bw,avx512vl")]
14573    unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
14574        let a = _mm256_set1_epi16(2);
14575        let b = _mm256_set1_epi16(-1);
14576        let mask = 0b001010101_01010101;
14577        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
14578        assert_eq!(r, 0b01010101_01010101);
14579    }
14580
14581    #[simd_test(enable = "avx512bw,avx512vl")]
14582    unsafe fn test_mm_cmpgt_epi16_mask() {
14583        let a = _mm_set1_epi16(2);
14584        let b = _mm_set1_epi16(-1);
14585        let m = _mm_cmpgt_epi16_mask(a, b);
14586        assert_eq!(m, 0b11111111);
14587    }
14588
14589    #[simd_test(enable = "avx512bw,avx512vl")]
14590    unsafe fn test_mm_mask_cmpgt_epi16_mask() {
14591        let a = _mm_set1_epi16(2);
14592        let b = _mm_set1_epi16(-1);
14593        let mask = 0b01010101;
14594        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
14595        assert_eq!(r, 0b01010101);
14596    }
14597
14598    #[simd_test(enable = "avx512bw")]
14599    unsafe fn test_mm512_cmpgt_epi8_mask() {
14600        let a = _mm512_set1_epi8(2);
14601        let b = _mm512_set1_epi8(-1);
14602        let m = _mm512_cmpgt_epi8_mask(a, b);
14603        assert_eq!(
14604            m,
14605            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14606        );
14607    }
14608
14609    #[simd_test(enable = "avx512bw")]
14610    unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
14611        let a = _mm512_set1_epi8(2);
14612        let b = _mm512_set1_epi8(-1);
14613        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14614        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
14615        assert_eq!(
14616            r,
14617            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14618        );
14619    }
14620
14621    #[simd_test(enable = "avx512bw,avx512vl")]
14622    unsafe fn test_mm256_cmpgt_epi8_mask() {
14623        let a = _mm256_set1_epi8(2);
14624        let b = _mm256_set1_epi8(-1);
14625        let m = _mm256_cmpgt_epi8_mask(a, b);
14626        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14627    }
14628
14629    #[simd_test(enable = "avx512bw,avx512vl")]
14630    unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
14631        let a = _mm256_set1_epi8(2);
14632        let b = _mm256_set1_epi8(-1);
14633        let mask = 0b01010101_01010101_01010101_01010101;
14634        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
14635        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14636    }
14637
14638    #[simd_test(enable = "avx512bw,avx512vl")]
14639    unsafe fn test_mm_cmpgt_epi8_mask() {
14640        let a = _mm_set1_epi8(2);
14641        let b = _mm_set1_epi8(-1);
14642        let m = _mm_cmpgt_epi8_mask(a, b);
14643        assert_eq!(m, 0b11111111_11111111);
14644    }
14645
14646    #[simd_test(enable = "avx512bw,avx512vl")]
14647    unsafe fn test_mm_mask_cmpgt_epi8_mask() {
14648        let a = _mm_set1_epi8(2);
14649        let b = _mm_set1_epi8(-1);
14650        let mask = 0b01010101_01010101;
14651        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
14652        assert_eq!(r, 0b01010101_01010101);
14653    }
14654
14655    #[simd_test(enable = "avx512bw")]
14656    unsafe fn test_mm512_cmple_epu16_mask() {
14657        let a = _mm512_set1_epi16(-1);
14658        let b = _mm512_set1_epi16(-1);
14659        let m = _mm512_cmple_epu16_mask(a, b);
14660        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14661    }
14662
14663    #[simd_test(enable = "avx512bw")]
14664    unsafe fn test_mm512_mask_cmple_epu16_mask() {
14665        let a = _mm512_set1_epi16(-1);
14666        let b = _mm512_set1_epi16(-1);
14667        let mask = 0b01010101_01010101_01010101_01010101;
14668        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
14669        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14670    }
14671
14672    #[simd_test(enable = "avx512bw,avx512vl")]
14673    unsafe fn test_mm256_cmple_epu16_mask() {
14674        let a = _mm256_set1_epi16(-1);
14675        let b = _mm256_set1_epi16(-1);
14676        let m = _mm256_cmple_epu16_mask(a, b);
14677        assert_eq!(m, 0b11111111_11111111);
14678    }
14679
14680    #[simd_test(enable = "avx512bw,avx512vl")]
14681    unsafe fn test_mm256_mask_cmple_epu16_mask() {
14682        let a = _mm256_set1_epi16(-1);
14683        let b = _mm256_set1_epi16(-1);
14684        let mask = 0b01010101_01010101;
14685        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
14686        assert_eq!(r, 0b01010101_01010101);
14687    }
14688
14689    #[simd_test(enable = "avx512bw,avx512vl")]
14690    unsafe fn test_mm_cmple_epu16_mask() {
14691        let a = _mm_set1_epi16(-1);
14692        let b = _mm_set1_epi16(-1);
14693        let m = _mm_cmple_epu16_mask(a, b);
14694        assert_eq!(m, 0b11111111);
14695    }
14696
14697    #[simd_test(enable = "avx512bw,avx512vl")]
14698    unsafe fn test_mm_mask_cmple_epu16_mask() {
14699        let a = _mm_set1_epi16(-1);
14700        let b = _mm_set1_epi16(-1);
14701        let mask = 0b01010101;
14702        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
14703        assert_eq!(r, 0b01010101);
14704    }
14705
14706    #[simd_test(enable = "avx512bw")]
14707    unsafe fn test_mm512_cmple_epu8_mask() {
14708        let a = _mm512_set1_epi8(-1);
14709        let b = _mm512_set1_epi8(-1);
14710        let m = _mm512_cmple_epu8_mask(a, b);
14711        assert_eq!(
14712            m,
14713            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14714        );
14715    }
14716
14717    #[simd_test(enable = "avx512bw")]
14718    unsafe fn test_mm512_mask_cmple_epu8_mask() {
14719        let a = _mm512_set1_epi8(-1);
14720        let b = _mm512_set1_epi8(-1);
14721        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14722        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
14723        assert_eq!(
14724            r,
14725            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14726        );
14727    }
14728
14729    #[simd_test(enable = "avx512bw,avx512vl")]
14730    unsafe fn test_mm256_cmple_epu8_mask() {
14731        let a = _mm256_set1_epi8(-1);
14732        let b = _mm256_set1_epi8(-1);
14733        let m = _mm256_cmple_epu8_mask(a, b);
14734        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14735    }
14736
14737    #[simd_test(enable = "avx512bw,avx512vl")]
14738    unsafe fn test_mm256_mask_cmple_epu8_mask() {
14739        let a = _mm256_set1_epi8(-1);
14740        let b = _mm256_set1_epi8(-1);
14741        let mask = 0b01010101_01010101_01010101_01010101;
14742        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
14743        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14744    }
14745
14746    #[simd_test(enable = "avx512bw,avx512vl")]
14747    unsafe fn test_mm_cmple_epu8_mask() {
14748        let a = _mm_set1_epi8(-1);
14749        let b = _mm_set1_epi8(-1);
14750        let m = _mm_cmple_epu8_mask(a, b);
14751        assert_eq!(m, 0b11111111_11111111);
14752    }
14753
14754    #[simd_test(enable = "avx512bw,avx512vl")]
14755    unsafe fn test_mm_mask_cmple_epu8_mask() {
14756        let a = _mm_set1_epi8(-1);
14757        let b = _mm_set1_epi8(-1);
14758        let mask = 0b01010101_01010101;
14759        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
14760        assert_eq!(r, 0b01010101_01010101);
14761    }
14762
14763    #[simd_test(enable = "avx512bw")]
14764    unsafe fn test_mm512_cmple_epi16_mask() {
14765        let a = _mm512_set1_epi16(-1);
14766        let b = _mm512_set1_epi16(-1);
14767        let m = _mm512_cmple_epi16_mask(a, b);
14768        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14769    }
14770
14771    #[simd_test(enable = "avx512bw")]
14772    unsafe fn test_mm512_mask_cmple_epi16_mask() {
14773        let a = _mm512_set1_epi16(-1);
14774        let b = _mm512_set1_epi16(-1);
14775        let mask = 0b01010101_01010101_01010101_01010101;
14776        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
14777        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14778    }
14779
14780    #[simd_test(enable = "avx512bw,avx512vl")]
14781    unsafe fn test_mm256_cmple_epi16_mask() {
14782        let a = _mm256_set1_epi16(-1);
14783        let b = _mm256_set1_epi16(-1);
14784        let m = _mm256_cmple_epi16_mask(a, b);
14785        assert_eq!(m, 0b11111111_11111111);
14786    }
14787
14788    #[simd_test(enable = "avx512bw,avx512vl")]
14789    unsafe fn test_mm256_mask_cmple_epi16_mask() {
14790        let a = _mm256_set1_epi16(-1);
14791        let b = _mm256_set1_epi16(-1);
14792        let mask = 0b01010101_01010101;
14793        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
14794        assert_eq!(r, 0b01010101_01010101);
14795    }
14796
14797    #[simd_test(enable = "avx512bw,avx512vl")]
14798    unsafe fn test_mm_cmple_epi16_mask() {
14799        let a = _mm_set1_epi16(-1);
14800        let b = _mm_set1_epi16(-1);
14801        let m = _mm_cmple_epi16_mask(a, b);
14802        assert_eq!(m, 0b11111111);
14803    }
14804
14805    #[simd_test(enable = "avx512bw,avx512vl")]
14806    unsafe fn test_mm_mask_cmple_epi16_mask() {
14807        let a = _mm_set1_epi16(-1);
14808        let b = _mm_set1_epi16(-1);
14809        let mask = 0b01010101;
14810        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
14811        assert_eq!(r, 0b01010101);
14812    }
14813
14814    #[simd_test(enable = "avx512bw")]
14815    unsafe fn test_mm512_cmple_epi8_mask() {
14816        let a = _mm512_set1_epi8(-1);
14817        let b = _mm512_set1_epi8(-1);
14818        let m = _mm512_cmple_epi8_mask(a, b);
14819        assert_eq!(
14820            m,
14821            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14822        );
14823    }
14824
14825    #[simd_test(enable = "avx512bw")]
14826    unsafe fn test_mm512_mask_cmple_epi8_mask() {
14827        let a = _mm512_set1_epi8(-1);
14828        let b = _mm512_set1_epi8(-1);
14829        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14830        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
14831        assert_eq!(
14832            r,
14833            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14834        );
14835    }
14836
14837    #[simd_test(enable = "avx512bw,avx512vl")]
14838    unsafe fn test_mm256_cmple_epi8_mask() {
14839        let a = _mm256_set1_epi8(-1);
14840        let b = _mm256_set1_epi8(-1);
14841        let m = _mm256_cmple_epi8_mask(a, b);
14842        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14843    }
14844
14845    #[simd_test(enable = "avx512bw,avx512vl")]
14846    unsafe fn test_mm256_mask_cmple_epi8_mask() {
14847        let a = _mm256_set1_epi8(-1);
14848        let b = _mm256_set1_epi8(-1);
14849        let mask = 0b01010101_01010101_01010101_01010101;
14850        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
14851        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14852    }
14853
14854    #[simd_test(enable = "avx512bw,avx512vl")]
14855    unsafe fn test_mm_cmple_epi8_mask() {
14856        let a = _mm_set1_epi8(-1);
14857        let b = _mm_set1_epi8(-1);
14858        let m = _mm_cmple_epi8_mask(a, b);
14859        assert_eq!(m, 0b11111111_11111111);
14860    }
14861
14862    #[simd_test(enable = "avx512bw,avx512vl")]
14863    unsafe fn test_mm_mask_cmple_epi8_mask() {
14864        let a = _mm_set1_epi8(-1);
14865        let b = _mm_set1_epi8(-1);
14866        let mask = 0b01010101_01010101;
14867        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
14868        assert_eq!(r, 0b01010101_01010101);
14869    }
14870
14871    #[simd_test(enable = "avx512bw")]
14872    unsafe fn test_mm512_cmpge_epu16_mask() {
14873        let a = _mm512_set1_epi16(1);
14874        let b = _mm512_set1_epi16(1);
14875        let m = _mm512_cmpge_epu16_mask(a, b);
14876        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14877    }
14878
14879    #[simd_test(enable = "avx512bw")]
14880    unsafe fn test_mm512_mask_cmpge_epu16_mask() {
14881        let a = _mm512_set1_epi16(1);
14882        let b = _mm512_set1_epi16(1);
14883        let mask = 0b01010101_01010101_01010101_01010101;
14884        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
14885        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14886    }
14887
14888    #[simd_test(enable = "avx512bw,avx512vl")]
14889    unsafe fn test_mm256_cmpge_epu16_mask() {
14890        let a = _mm256_set1_epi16(1);
14891        let b = _mm256_set1_epi16(1);
14892        let m = _mm256_cmpge_epu16_mask(a, b);
14893        assert_eq!(m, 0b11111111_11111111);
14894    }
14895
14896    #[simd_test(enable = "avx512bw,avx512vl")]
14897    unsafe fn test_mm256_mask_cmpge_epu16_mask() {
14898        let a = _mm256_set1_epi16(1);
14899        let b = _mm256_set1_epi16(1);
14900        let mask = 0b01010101_01010101;
14901        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
14902        assert_eq!(r, 0b01010101_01010101);
14903    }
14904
14905    #[simd_test(enable = "avx512bw,avx512vl")]
14906    unsafe fn test_mm_cmpge_epu16_mask() {
14907        let a = _mm_set1_epi16(1);
14908        let b = _mm_set1_epi16(1);
14909        let m = _mm_cmpge_epu16_mask(a, b);
14910        assert_eq!(m, 0b11111111);
14911    }
14912
14913    #[simd_test(enable = "avx512bw,avx512vl")]
14914    unsafe fn test_mm_mask_cmpge_epu16_mask() {
14915        let a = _mm_set1_epi16(1);
14916        let b = _mm_set1_epi16(1);
14917        let mask = 0b01010101;
14918        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
14919        assert_eq!(r, 0b01010101);
14920    }
14921
14922    #[simd_test(enable = "avx512bw")]
14923    unsafe fn test_mm512_cmpge_epu8_mask() {
14924        let a = _mm512_set1_epi8(1);
14925        let b = _mm512_set1_epi8(1);
14926        let m = _mm512_cmpge_epu8_mask(a, b);
14927        assert_eq!(
14928            m,
14929            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14930        );
14931    }
14932
14933    #[simd_test(enable = "avx512bw")]
14934    unsafe fn test_mm512_mask_cmpge_epu8_mask() {
14935        let a = _mm512_set1_epi8(1);
14936        let b = _mm512_set1_epi8(1);
14937        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14938        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
14939        assert_eq!(
14940            r,
14941            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14942        );
14943    }
14944
14945    #[simd_test(enable = "avx512bw,avx512vl")]
14946    unsafe fn test_mm256_cmpge_epu8_mask() {
14947        let a = _mm256_set1_epi8(1);
14948        let b = _mm256_set1_epi8(1);
14949        let m = _mm256_cmpge_epu8_mask(a, b);
14950        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14951    }
14952
14953    #[simd_test(enable = "avx512bw,avx512vl")]
14954    unsafe fn test_mm256_mask_cmpge_epu8_mask() {
14955        let a = _mm256_set1_epi8(1);
14956        let b = _mm256_set1_epi8(1);
14957        let mask = 0b01010101_01010101_01010101_01010101;
14958        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
14959        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14960    }
14961
14962    #[simd_test(enable = "avx512bw,avx512vl")]
14963    unsafe fn test_mm_cmpge_epu8_mask() {
14964        let a = _mm_set1_epi8(1);
14965        let b = _mm_set1_epi8(1);
14966        let m = _mm_cmpge_epu8_mask(a, b);
14967        assert_eq!(m, 0b11111111_11111111);
14968    }
14969
14970    #[simd_test(enable = "avx512bw,avx512vl")]
14971    unsafe fn test_mm_mask_cmpge_epu8_mask() {
14972        let a = _mm_set1_epi8(1);
14973        let b = _mm_set1_epi8(1);
14974        let mask = 0b01010101_01010101;
14975        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
14976        assert_eq!(r, 0b01010101_01010101);
14977    }
14978
14979    #[simd_test(enable = "avx512bw")]
14980    unsafe fn test_mm512_cmpge_epi16_mask() {
14981        let a = _mm512_set1_epi16(-1);
14982        let b = _mm512_set1_epi16(-1);
14983        let m = _mm512_cmpge_epi16_mask(a, b);
14984        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14985    }
14986
14987    #[simd_test(enable = "avx512bw")]
14988    unsafe fn test_mm512_mask_cmpge_epi16_mask() {
14989        let a = _mm512_set1_epi16(-1);
14990        let b = _mm512_set1_epi16(-1);
14991        let mask = 0b01010101_01010101_01010101_01010101;
14992        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
14993        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14994    }
14995
14996    #[simd_test(enable = "avx512bw,avx512vl")]
14997    unsafe fn test_mm256_cmpge_epi16_mask() {
14998        let a = _mm256_set1_epi16(-1);
14999        let b = _mm256_set1_epi16(-1);
15000        let m = _mm256_cmpge_epi16_mask(a, b);
15001        assert_eq!(m, 0b11111111_11111111);
15002    }
15003
15004    #[simd_test(enable = "avx512bw,avx512vl")]
15005    unsafe fn test_mm256_mask_cmpge_epi16_mask() {
15006        let a = _mm256_set1_epi16(-1);
15007        let b = _mm256_set1_epi16(-1);
15008        let mask = 0b01010101_01010101;
15009        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15010        assert_eq!(r, 0b01010101_01010101);
15011    }
15012
15013    #[simd_test(enable = "avx512bw,avx512vl")]
15014    unsafe fn test_mm_cmpge_epi16_mask() {
15015        let a = _mm_set1_epi16(-1);
15016        let b = _mm_set1_epi16(-1);
15017        let m = _mm_cmpge_epi16_mask(a, b);
15018        assert_eq!(m, 0b11111111);
15019    }
15020
15021    #[simd_test(enable = "avx512bw,avx512vl")]
15022    unsafe fn test_mm_mask_cmpge_epi16_mask() {
15023        let a = _mm_set1_epi16(-1);
15024        let b = _mm_set1_epi16(-1);
15025        let mask = 0b01010101;
15026        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15027        assert_eq!(r, 0b01010101);
15028    }
15029
15030    #[simd_test(enable = "avx512bw")]
15031    unsafe fn test_mm512_cmpge_epi8_mask() {
15032        let a = _mm512_set1_epi8(-1);
15033        let b = _mm512_set1_epi8(-1);
15034        let m = _mm512_cmpge_epi8_mask(a, b);
15035        assert_eq!(
15036            m,
15037            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15038        );
15039    }
15040
15041    #[simd_test(enable = "avx512bw")]
15042    unsafe fn test_mm512_mask_cmpge_epi8_mask() {
15043        let a = _mm512_set1_epi8(-1);
15044        let b = _mm512_set1_epi8(-1);
15045        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15046        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15047        assert_eq!(
15048            r,
15049            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15050        );
15051    }
15052
15053    #[simd_test(enable = "avx512bw,avx512vl")]
15054    unsafe fn test_mm256_cmpge_epi8_mask() {
15055        let a = _mm256_set1_epi8(-1);
15056        let b = _mm256_set1_epi8(-1);
15057        let m = _mm256_cmpge_epi8_mask(a, b);
15058        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15059    }
15060
15061    #[simd_test(enable = "avx512bw,avx512vl")]
15062    unsafe fn test_mm256_mask_cmpge_epi8_mask() {
15063        let a = _mm256_set1_epi8(-1);
15064        let b = _mm256_set1_epi8(-1);
15065        let mask = 0b01010101_01010101_01010101_01010101;
15066        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15067        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15068    }
15069
15070    #[simd_test(enable = "avx512bw,avx512vl")]
15071    unsafe fn test_mm_cmpge_epi8_mask() {
15072        let a = _mm_set1_epi8(-1);
15073        let b = _mm_set1_epi8(-1);
15074        let m = _mm_cmpge_epi8_mask(a, b);
15075        assert_eq!(m, 0b11111111_11111111);
15076    }
15077
15078    #[simd_test(enable = "avx512bw,avx512vl")]
15079    unsafe fn test_mm_mask_cmpge_epi8_mask() {
15080        let a = _mm_set1_epi8(-1);
15081        let b = _mm_set1_epi8(-1);
15082        let mask = 0b01010101_01010101;
15083        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15084        assert_eq!(r, 0b01010101_01010101);
15085    }
15086
15087    #[simd_test(enable = "avx512bw")]
15088    unsafe fn test_mm512_cmpeq_epu16_mask() {
15089        let a = _mm512_set1_epi16(1);
15090        let b = _mm512_set1_epi16(1);
15091        let m = _mm512_cmpeq_epu16_mask(a, b);
15092        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15093    }
15094
15095    #[simd_test(enable = "avx512bw")]
15096    unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
15097        let a = _mm512_set1_epi16(1);
15098        let b = _mm512_set1_epi16(1);
15099        let mask = 0b01010101_01010101_01010101_01010101;
15100        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15101        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15102    }
15103
15104    #[simd_test(enable = "avx512bw,avx512vl")]
15105    unsafe fn test_mm256_cmpeq_epu16_mask() {
15106        let a = _mm256_set1_epi16(1);
15107        let b = _mm256_set1_epi16(1);
15108        let m = _mm256_cmpeq_epu16_mask(a, b);
15109        assert_eq!(m, 0b11111111_11111111);
15110    }
15111
15112    #[simd_test(enable = "avx512bw,avx512vl")]
15113    unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
15114        let a = _mm256_set1_epi16(1);
15115        let b = _mm256_set1_epi16(1);
15116        let mask = 0b01010101_01010101;
15117        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15118        assert_eq!(r, 0b01010101_01010101);
15119    }
15120
15121    #[simd_test(enable = "avx512bw,avx512vl")]
15122    unsafe fn test_mm_cmpeq_epu16_mask() {
15123        let a = _mm_set1_epi16(1);
15124        let b = _mm_set1_epi16(1);
15125        let m = _mm_cmpeq_epu16_mask(a, b);
15126        assert_eq!(m, 0b11111111);
15127    }
15128
15129    #[simd_test(enable = "avx512bw,avx512vl")]
15130    unsafe fn test_mm_mask_cmpeq_epu16_mask() {
15131        let a = _mm_set1_epi16(1);
15132        let b = _mm_set1_epi16(1);
15133        let mask = 0b01010101;
15134        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
15135        assert_eq!(r, 0b01010101);
15136    }
15137
15138    #[simd_test(enable = "avx512bw")]
15139    unsafe fn test_mm512_cmpeq_epu8_mask() {
15140        let a = _mm512_set1_epi8(1);
15141        let b = _mm512_set1_epi8(1);
15142        let m = _mm512_cmpeq_epu8_mask(a, b);
15143        assert_eq!(
15144            m,
15145            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15146        );
15147    }
15148
15149    #[simd_test(enable = "avx512bw")]
15150    unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
15151        let a = _mm512_set1_epi8(1);
15152        let b = _mm512_set1_epi8(1);
15153        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15154        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
15155        assert_eq!(
15156            r,
15157            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15158        );
15159    }
15160
15161    #[simd_test(enable = "avx512bw,avx512vl")]
15162    unsafe fn test_mm256_cmpeq_epu8_mask() {
15163        let a = _mm256_set1_epi8(1);
15164        let b = _mm256_set1_epi8(1);
15165        let m = _mm256_cmpeq_epu8_mask(a, b);
15166        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15167    }
15168
15169    #[simd_test(enable = "avx512bw,avx512vl")]
15170    unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
15171        let a = _mm256_set1_epi8(1);
15172        let b = _mm256_set1_epi8(1);
15173        let mask = 0b01010101_01010101_01010101_01010101;
15174        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
15175        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15176    }
15177
15178    #[simd_test(enable = "avx512bw,avx512vl")]
15179    unsafe fn test_mm_cmpeq_epu8_mask() {
15180        let a = _mm_set1_epi8(1);
15181        let b = _mm_set1_epi8(1);
15182        let m = _mm_cmpeq_epu8_mask(a, b);
15183        assert_eq!(m, 0b11111111_11111111);
15184    }
15185
15186    #[simd_test(enable = "avx512bw,avx512vl")]
15187    unsafe fn test_mm_mask_cmpeq_epu8_mask() {
15188        let a = _mm_set1_epi8(1);
15189        let b = _mm_set1_epi8(1);
15190        let mask = 0b01010101_01010101;
15191        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
15192        assert_eq!(r, 0b01010101_01010101);
15193    }
15194
15195    #[simd_test(enable = "avx512bw")]
15196    unsafe fn test_mm512_cmpeq_epi16_mask() {
15197        let a = _mm512_set1_epi16(-1);
15198        let b = _mm512_set1_epi16(-1);
15199        let m = _mm512_cmpeq_epi16_mask(a, b);
15200        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15201    }
15202
15203    #[simd_test(enable = "avx512bw")]
15204    unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
15205        let a = _mm512_set1_epi16(-1);
15206        let b = _mm512_set1_epi16(-1);
15207        let mask = 0b01010101_01010101_01010101_01010101;
15208        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
15209        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15210    }
15211
15212    #[simd_test(enable = "avx512bw,avx512vl")]
15213    unsafe fn test_mm256_cmpeq_epi16_mask() {
15214        let a = _mm256_set1_epi16(-1);
15215        let b = _mm256_set1_epi16(-1);
15216        let m = _mm256_cmpeq_epi16_mask(a, b);
15217        assert_eq!(m, 0b11111111_11111111);
15218    }
15219
15220    #[simd_test(enable = "avx512bw,avx512vl")]
15221    unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
15222        let a = _mm256_set1_epi16(-1);
15223        let b = _mm256_set1_epi16(-1);
15224        let mask = 0b01010101_01010101;
15225        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
15226        assert_eq!(r, 0b01010101_01010101);
15227    }
15228
15229    #[simd_test(enable = "avx512bw,avx512vl")]
15230    unsafe fn test_mm_cmpeq_epi16_mask() {
15231        let a = _mm_set1_epi16(-1);
15232        let b = _mm_set1_epi16(-1);
15233        let m = _mm_cmpeq_epi16_mask(a, b);
15234        assert_eq!(m, 0b11111111);
15235    }
15236
15237    #[simd_test(enable = "avx512bw,avx512vl")]
15238    unsafe fn test_mm_mask_cmpeq_epi16_mask() {
15239        let a = _mm_set1_epi16(-1);
15240        let b = _mm_set1_epi16(-1);
15241        let mask = 0b01010101;
15242        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
15243        assert_eq!(r, 0b01010101);
15244    }
15245
15246    #[simd_test(enable = "avx512bw")]
15247    unsafe fn test_mm512_cmpeq_epi8_mask() {
15248        let a = _mm512_set1_epi8(-1);
15249        let b = _mm512_set1_epi8(-1);
15250        let m = _mm512_cmpeq_epi8_mask(a, b);
15251        assert_eq!(
15252            m,
15253            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15254        );
15255    }
15256
15257    #[simd_test(enable = "avx512bw")]
15258    unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
15259        let a = _mm512_set1_epi8(-1);
15260        let b = _mm512_set1_epi8(-1);
15261        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15262        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
15263        assert_eq!(
15264            r,
15265            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15266        );
15267    }
15268
15269    #[simd_test(enable = "avx512bw,avx512vl")]
15270    unsafe fn test_mm256_cmpeq_epi8_mask() {
15271        let a = _mm256_set1_epi8(-1);
15272        let b = _mm256_set1_epi8(-1);
15273        let m = _mm256_cmpeq_epi8_mask(a, b);
15274        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15275    }
15276
15277    #[simd_test(enable = "avx512bw,avx512vl")]
15278    unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
15279        let a = _mm256_set1_epi8(-1);
15280        let b = _mm256_set1_epi8(-1);
15281        let mask = 0b01010101_01010101_01010101_01010101;
15282        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
15283        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15284    }
15285
15286    #[simd_test(enable = "avx512bw,avx512vl")]
15287    unsafe fn test_mm_cmpeq_epi8_mask() {
15288        let a = _mm_set1_epi8(-1);
15289        let b = _mm_set1_epi8(-1);
15290        let m = _mm_cmpeq_epi8_mask(a, b);
15291        assert_eq!(m, 0b11111111_11111111);
15292    }
15293
15294    #[simd_test(enable = "avx512bw,avx512vl")]
15295    unsafe fn test_mm_mask_cmpeq_epi8_mask() {
15296        let a = _mm_set1_epi8(-1);
15297        let b = _mm_set1_epi8(-1);
15298        let mask = 0b01010101_01010101;
15299        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
15300        assert_eq!(r, 0b01010101_01010101);
15301    }
15302
15303    #[simd_test(enable = "avx512bw")]
15304    unsafe fn test_mm512_cmpneq_epu16_mask() {
15305        let a = _mm512_set1_epi16(2);
15306        let b = _mm512_set1_epi16(1);
15307        let m = _mm512_cmpneq_epu16_mask(a, b);
15308        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15309    }
15310
15311    #[simd_test(enable = "avx512bw")]
15312    unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
15313        let a = _mm512_set1_epi16(2);
15314        let b = _mm512_set1_epi16(1);
15315        let mask = 0b01010101_01010101_01010101_01010101;
15316        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
15317        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15318    }
15319
15320    #[simd_test(enable = "avx512bw,avx512vl")]
15321    unsafe fn test_mm256_cmpneq_epu16_mask() {
15322        let a = _mm256_set1_epi16(2);
15323        let b = _mm256_set1_epi16(1);
15324        let m = _mm256_cmpneq_epu16_mask(a, b);
15325        assert_eq!(m, 0b11111111_11111111);
15326    }
15327
15328    #[simd_test(enable = "avx512bw,avx512vl")]
15329    unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
15330        let a = _mm256_set1_epi16(2);
15331        let b = _mm256_set1_epi16(1);
15332        let mask = 0b01010101_01010101;
15333        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
15334        assert_eq!(r, 0b01010101_01010101);
15335    }
15336
15337    #[simd_test(enable = "avx512bw,avx512vl")]
15338    unsafe fn test_mm_cmpneq_epu16_mask() {
15339        let a = _mm_set1_epi16(2);
15340        let b = _mm_set1_epi16(1);
15341        let m = _mm_cmpneq_epu16_mask(a, b);
15342        assert_eq!(m, 0b11111111);
15343    }
15344
15345    #[simd_test(enable = "avx512bw,avx512vl")]
15346    unsafe fn test_mm_mask_cmpneq_epu16_mask() {
15347        let a = _mm_set1_epi16(2);
15348        let b = _mm_set1_epi16(1);
15349        let mask = 0b01010101;
15350        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
15351        assert_eq!(r, 0b01010101);
15352    }
15353
15354    #[simd_test(enable = "avx512bw")]
15355    unsafe fn test_mm512_cmpneq_epu8_mask() {
15356        let a = _mm512_set1_epi8(2);
15357        let b = _mm512_set1_epi8(1);
15358        let m = _mm512_cmpneq_epu8_mask(a, b);
15359        assert_eq!(
15360            m,
15361            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15362        );
15363    }
15364
15365    #[simd_test(enable = "avx512bw")]
15366    unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
15367        let a = _mm512_set1_epi8(2);
15368        let b = _mm512_set1_epi8(1);
15369        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15370        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
15371        assert_eq!(
15372            r,
15373            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15374        );
15375    }
15376
15377    #[simd_test(enable = "avx512bw,avx512vl")]
15378    unsafe fn test_mm256_cmpneq_epu8_mask() {
15379        let a = _mm256_set1_epi8(2);
15380        let b = _mm256_set1_epi8(1);
15381        let m = _mm256_cmpneq_epu8_mask(a, b);
15382        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15383    }
15384
15385    #[simd_test(enable = "avx512bw,avx512vl")]
15386    unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
15387        let a = _mm256_set1_epi8(2);
15388        let b = _mm256_set1_epi8(1);
15389        let mask = 0b01010101_01010101_01010101_01010101;
15390        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
15391        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15392    }
15393
15394    #[simd_test(enable = "avx512bw,avx512vl")]
15395    unsafe fn test_mm_cmpneq_epu8_mask() {
15396        let a = _mm_set1_epi8(2);
15397        let b = _mm_set1_epi8(1);
15398        let m = _mm_cmpneq_epu8_mask(a, b);
15399        assert_eq!(m, 0b11111111_11111111);
15400    }
15401
15402    #[simd_test(enable = "avx512bw,avx512vl")]
15403    unsafe fn test_mm_mask_cmpneq_epu8_mask() {
15404        let a = _mm_set1_epi8(2);
15405        let b = _mm_set1_epi8(1);
15406        let mask = 0b01010101_01010101;
15407        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
15408        assert_eq!(r, 0b01010101_01010101);
15409    }
15410
15411    #[simd_test(enable = "avx512bw")]
15412    unsafe fn test_mm512_cmpneq_epi16_mask() {
15413        let a = _mm512_set1_epi16(1);
15414        let b = _mm512_set1_epi16(-1);
15415        let m = _mm512_cmpneq_epi16_mask(a, b);
15416        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15417    }
15418
15419    #[simd_test(enable = "avx512bw")]
15420    unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
15421        let a = _mm512_set1_epi16(1);
15422        let b = _mm512_set1_epi16(-1);
15423        let mask = 0b01010101_01010101_01010101_01010101;
15424        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
15425        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15426    }
15427
15428    #[simd_test(enable = "avx512bw,avx512vl")]
15429    unsafe fn test_mm256_cmpneq_epi16_mask() {
15430        let a = _mm256_set1_epi16(1);
15431        let b = _mm256_set1_epi16(-1);
15432        let m = _mm256_cmpneq_epi16_mask(a, b);
15433        assert_eq!(m, 0b11111111_11111111);
15434    }
15435
15436    #[simd_test(enable = "avx512bw,avx512vl")]
15437    unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
15438        let a = _mm256_set1_epi16(1);
15439        let b = _mm256_set1_epi16(-1);
15440        let mask = 0b01010101_01010101;
15441        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
15442        assert_eq!(r, 0b01010101_01010101);
15443    }
15444
15445    #[simd_test(enable = "avx512bw,avx512vl")]
15446    unsafe fn test_mm_cmpneq_epi16_mask() {
15447        let a = _mm_set1_epi16(1);
15448        let b = _mm_set1_epi16(-1);
15449        let m = _mm_cmpneq_epi16_mask(a, b);
15450        assert_eq!(m, 0b11111111);
15451    }
15452
15453    #[simd_test(enable = "avx512bw,avx512vl")]
15454    unsafe fn test_mm_mask_cmpneq_epi16_mask() {
15455        let a = _mm_set1_epi16(1);
15456        let b = _mm_set1_epi16(-1);
15457        let mask = 0b01010101;
15458        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
15459        assert_eq!(r, 0b01010101);
15460    }
15461
15462    #[simd_test(enable = "avx512bw")]
15463    unsafe fn test_mm512_cmpneq_epi8_mask() {
15464        let a = _mm512_set1_epi8(1);
15465        let b = _mm512_set1_epi8(-1);
15466        let m = _mm512_cmpneq_epi8_mask(a, b);
15467        assert_eq!(
15468            m,
15469            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15470        );
15471    }
15472
15473    #[simd_test(enable = "avx512bw")]
15474    unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
15475        let a = _mm512_set1_epi8(1);
15476        let b = _mm512_set1_epi8(-1);
15477        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15478        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
15479        assert_eq!(
15480            r,
15481            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15482        );
15483    }
15484
15485    #[simd_test(enable = "avx512bw,avx512vl")]
15486    unsafe fn test_mm256_cmpneq_epi8_mask() {
15487        let a = _mm256_set1_epi8(1);
15488        let b = _mm256_set1_epi8(-1);
15489        let m = _mm256_cmpneq_epi8_mask(a, b);
15490        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15491    }
15492
15493    #[simd_test(enable = "avx512bw,avx512vl")]
15494    unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
15495        let a = _mm256_set1_epi8(1);
15496        let b = _mm256_set1_epi8(-1);
15497        let mask = 0b01010101_01010101_01010101_01010101;
15498        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
15499        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15500    }
15501
15502    #[simd_test(enable = "avx512bw,avx512vl")]
15503    unsafe fn test_mm_cmpneq_epi8_mask() {
15504        let a = _mm_set1_epi8(1);
15505        let b = _mm_set1_epi8(-1);
15506        let m = _mm_cmpneq_epi8_mask(a, b);
15507        assert_eq!(m, 0b11111111_11111111);
15508    }
15509
15510    #[simd_test(enable = "avx512bw,avx512vl")]
15511    unsafe fn test_mm_mask_cmpneq_epi8_mask() {
15512        let a = _mm_set1_epi8(1);
15513        let b = _mm_set1_epi8(-1);
15514        let mask = 0b01010101_01010101;
15515        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
15516        assert_eq!(r, 0b01010101_01010101);
15517    }
15518
15519    #[simd_test(enable = "avx512bw")]
15520    unsafe fn test_mm512_cmp_epu16_mask() {
15521        let a = _mm512_set1_epi16(0);
15522        let b = _mm512_set1_epi16(1);
15523        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15524        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15525    }
15526
15527    #[simd_test(enable = "avx512bw")]
15528    unsafe fn test_mm512_mask_cmp_epu16_mask() {
15529        let a = _mm512_set1_epi16(0);
15530        let b = _mm512_set1_epi16(1);
15531        let mask = 0b01010101_01010101_01010101_01010101;
15532        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15533        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15534    }
15535
15536    #[simd_test(enable = "avx512bw,avx512vl")]
15537    unsafe fn test_mm256_cmp_epu16_mask() {
15538        let a = _mm256_set1_epi16(0);
15539        let b = _mm256_set1_epi16(1);
15540        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15541        assert_eq!(m, 0b11111111_11111111);
15542    }
15543
15544    #[simd_test(enable = "avx512bw,avx512vl")]
15545    unsafe fn test_mm256_mask_cmp_epu16_mask() {
15546        let a = _mm256_set1_epi16(0);
15547        let b = _mm256_set1_epi16(1);
15548        let mask = 0b01010101_01010101;
15549        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15550        assert_eq!(r, 0b01010101_01010101);
15551    }
15552
15553    #[simd_test(enable = "avx512bw,avx512vl")]
15554    unsafe fn test_mm_cmp_epu16_mask() {
15555        let a = _mm_set1_epi16(0);
15556        let b = _mm_set1_epi16(1);
15557        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15558        assert_eq!(m, 0b11111111);
15559    }
15560
15561    #[simd_test(enable = "avx512bw,avx512vl")]
15562    unsafe fn test_mm_mask_cmp_epu16_mask() {
15563        let a = _mm_set1_epi16(0);
15564        let b = _mm_set1_epi16(1);
15565        let mask = 0b01010101;
15566        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15567        assert_eq!(r, 0b01010101);
15568    }
15569
15570    #[simd_test(enable = "avx512bw")]
15571    unsafe fn test_mm512_cmp_epu8_mask() {
15572        let a = _mm512_set1_epi8(0);
15573        let b = _mm512_set1_epi8(1);
15574        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15575        assert_eq!(
15576            m,
15577            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15578        );
15579    }
15580
15581    #[simd_test(enable = "avx512bw")]
15582    unsafe fn test_mm512_mask_cmp_epu8_mask() {
15583        let a = _mm512_set1_epi8(0);
15584        let b = _mm512_set1_epi8(1);
15585        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15586        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15587        assert_eq!(
15588            r,
15589            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15590        );
15591    }
15592
15593    #[simd_test(enable = "avx512bw,avx512vl")]
15594    unsafe fn test_mm256_cmp_epu8_mask() {
15595        let a = _mm256_set1_epi8(0);
15596        let b = _mm256_set1_epi8(1);
15597        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15598        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15599    }
15600
15601    #[simd_test(enable = "avx512bw,avx512vl")]
15602    unsafe fn test_mm256_mask_cmp_epu8_mask() {
15603        let a = _mm256_set1_epi8(0);
15604        let b = _mm256_set1_epi8(1);
15605        let mask = 0b01010101_01010101_01010101_01010101;
15606        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15607        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15608    }
15609
15610    #[simd_test(enable = "avx512bw,avx512vl")]
15611    unsafe fn test_mm_cmp_epu8_mask() {
15612        let a = _mm_set1_epi8(0);
15613        let b = _mm_set1_epi8(1);
15614        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15615        assert_eq!(m, 0b11111111_11111111);
15616    }
15617
15618    #[simd_test(enable = "avx512bw,avx512vl")]
15619    unsafe fn test_mm_mask_cmp_epu8_mask() {
15620        let a = _mm_set1_epi8(0);
15621        let b = _mm_set1_epi8(1);
15622        let mask = 0b01010101_01010101;
15623        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15624        assert_eq!(r, 0b01010101_01010101);
15625    }
15626
15627    #[simd_test(enable = "avx512bw")]
15628    unsafe fn test_mm512_cmp_epi16_mask() {
15629        let a = _mm512_set1_epi16(0);
15630        let b = _mm512_set1_epi16(1);
15631        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15632        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15633    }
15634
15635    #[simd_test(enable = "avx512bw")]
15636    unsafe fn test_mm512_mask_cmp_epi16_mask() {
15637        let a = _mm512_set1_epi16(0);
15638        let b = _mm512_set1_epi16(1);
15639        let mask = 0b01010101_01010101_01010101_01010101;
15640        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15641        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15642    }
15643
15644    #[simd_test(enable = "avx512bw,avx512vl")]
15645    unsafe fn test_mm256_cmp_epi16_mask() {
15646        let a = _mm256_set1_epi16(0);
15647        let b = _mm256_set1_epi16(1);
15648        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15649        assert_eq!(m, 0b11111111_11111111);
15650    }
15651
15652    #[simd_test(enable = "avx512bw,avx512vl")]
15653    unsafe fn test_mm256_mask_cmp_epi16_mask() {
15654        let a = _mm256_set1_epi16(0);
15655        let b = _mm256_set1_epi16(1);
15656        let mask = 0b01010101_01010101;
15657        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15658        assert_eq!(r, 0b01010101_01010101);
15659    }
15660
15661    #[simd_test(enable = "avx512bw,avx512vl")]
15662    unsafe fn test_mm_cmp_epi16_mask() {
15663        let a = _mm_set1_epi16(0);
15664        let b = _mm_set1_epi16(1);
15665        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15666        assert_eq!(m, 0b11111111);
15667    }
15668
15669    #[simd_test(enable = "avx512bw,avx512vl")]
15670    unsafe fn test_mm_mask_cmp_epi16_mask() {
15671        let a = _mm_set1_epi16(0);
15672        let b = _mm_set1_epi16(1);
15673        let mask = 0b01010101;
15674        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15675        assert_eq!(r, 0b01010101);
15676    }
15677
15678    #[simd_test(enable = "avx512bw")]
15679    unsafe fn test_mm512_cmp_epi8_mask() {
15680        let a = _mm512_set1_epi8(0);
15681        let b = _mm512_set1_epi8(1);
15682        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15683        assert_eq!(
15684            m,
15685            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15686        );
15687    }
15688
15689    #[simd_test(enable = "avx512bw")]
15690    unsafe fn test_mm512_mask_cmp_epi8_mask() {
15691        let a = _mm512_set1_epi8(0);
15692        let b = _mm512_set1_epi8(1);
15693        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15694        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15695        assert_eq!(
15696            r,
15697            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15698        );
15699    }
15700
15701    #[simd_test(enable = "avx512bw,avx512vl")]
15702    unsafe fn test_mm256_cmp_epi8_mask() {
15703        let a = _mm256_set1_epi8(0);
15704        let b = _mm256_set1_epi8(1);
15705        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15706        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15707    }
15708
15709    #[simd_test(enable = "avx512bw,avx512vl")]
15710    unsafe fn test_mm256_mask_cmp_epi8_mask() {
15711        let a = _mm256_set1_epi8(0);
15712        let b = _mm256_set1_epi8(1);
15713        let mask = 0b01010101_01010101_01010101_01010101;
15714        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15715        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15716    }
15717
15718    #[simd_test(enable = "avx512bw,avx512vl")]
15719    unsafe fn test_mm_cmp_epi8_mask() {
15720        let a = _mm_set1_epi8(0);
15721        let b = _mm_set1_epi8(1);
15722        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15723        assert_eq!(m, 0b11111111_11111111);
15724    }
15725
15726    #[simd_test(enable = "avx512bw,avx512vl")]
15727    unsafe fn test_mm_mask_cmp_epi8_mask() {
15728        let a = _mm_set1_epi8(0);
15729        let b = _mm_set1_epi8(1);
15730        let mask = 0b01010101_01010101;
15731        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15732        assert_eq!(r, 0b01010101_01010101);
15733    }
15734
15735    #[simd_test(enable = "avx512bw,avx512vl")]
15736    unsafe fn test_mm256_reduce_add_epi16() {
15737        let a = _mm256_set1_epi16(1);
15738        let e = _mm256_reduce_add_epi16(a);
15739        assert_eq!(16, e);
15740    }
15741
15742    #[simd_test(enable = "avx512bw,avx512vl")]
15743    unsafe fn test_mm256_mask_reduce_add_epi16() {
15744        let a = _mm256_set1_epi16(1);
15745        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
15746        assert_eq!(8, e);
15747    }
15748
15749    #[simd_test(enable = "avx512bw,avx512vl")]
15750    unsafe fn test_mm_reduce_add_epi16() {
15751        let a = _mm_set1_epi16(1);
15752        let e = _mm_reduce_add_epi16(a);
15753        assert_eq!(8, e);
15754    }
15755
15756    #[simd_test(enable = "avx512bw,avx512vl")]
15757    unsafe fn test_mm_mask_reduce_add_epi16() {
15758        let a = _mm_set1_epi16(1);
15759        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
15760        assert_eq!(4, e);
15761    }
15762
15763    #[simd_test(enable = "avx512bw,avx512vl")]
15764    unsafe fn test_mm256_reduce_add_epi8() {
15765        let a = _mm256_set1_epi8(1);
15766        let e = _mm256_reduce_add_epi8(a);
15767        assert_eq!(32, e);
15768    }
15769
15770    #[simd_test(enable = "avx512bw,avx512vl")]
15771    unsafe fn test_mm256_mask_reduce_add_epi8() {
15772        let a = _mm256_set1_epi8(1);
15773        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
15774        assert_eq!(16, e);
15775    }
15776
15777    #[simd_test(enable = "avx512bw,avx512vl")]
15778    unsafe fn test_mm_reduce_add_epi8() {
15779        let a = _mm_set1_epi8(1);
15780        let e = _mm_reduce_add_epi8(a);
15781        assert_eq!(16, e);
15782    }
15783
15784    #[simd_test(enable = "avx512bw,avx512vl")]
15785    unsafe fn test_mm_mask_reduce_add_epi8() {
15786        let a = _mm_set1_epi8(1);
15787        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
15788        assert_eq!(8, e);
15789    }
15790
15791    #[simd_test(enable = "avx512bw,avx512vl")]
15792    unsafe fn test_mm256_reduce_and_epi16() {
15793        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15794        let e = _mm256_reduce_and_epi16(a);
15795        assert_eq!(0, e);
15796    }
15797
15798    #[simd_test(enable = "avx512bw,avx512vl")]
15799    unsafe fn test_mm256_mask_reduce_and_epi16() {
15800        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15801        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
15802        assert_eq!(1, e);
15803    }
15804
15805    #[simd_test(enable = "avx512bw,avx512vl")]
15806    unsafe fn test_mm_reduce_and_epi16() {
15807        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15808        let e = _mm_reduce_and_epi16(a);
15809        assert_eq!(0, e);
15810    }
15811
15812    #[simd_test(enable = "avx512bw,avx512vl")]
15813    unsafe fn test_mm_mask_reduce_and_epi16() {
15814        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15815        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
15816        assert_eq!(1, e);
15817    }
15818
15819    #[simd_test(enable = "avx512bw,avx512vl")]
15820    unsafe fn test_mm256_reduce_and_epi8() {
15821        let a = _mm256_set_epi8(
15822            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15823            2, 2, 2,
15824        );
15825        let e = _mm256_reduce_and_epi8(a);
15826        assert_eq!(0, e);
15827    }
15828
15829    #[simd_test(enable = "avx512bw,avx512vl")]
15830    unsafe fn test_mm256_mask_reduce_and_epi8() {
15831        let a = _mm256_set_epi8(
15832            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15833            2, 2, 2,
15834        );
15835        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
15836        assert_eq!(1, e);
15837    }
15838
15839    #[simd_test(enable = "avx512bw,avx512vl")]
15840    unsafe fn test_mm_reduce_and_epi8() {
15841        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15842        let e = _mm_reduce_and_epi8(a);
15843        assert_eq!(0, e);
15844    }
15845
15846    #[simd_test(enable = "avx512bw,avx512vl")]
15847    unsafe fn test_mm_mask_reduce_and_epi8() {
15848        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15849        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
15850        assert_eq!(1, e);
15851    }
15852
15853    #[simd_test(enable = "avx512bw,avx512vl")]
15854    unsafe fn test_mm256_reduce_mul_epi16() {
15855        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15856        let e = _mm256_reduce_mul_epi16(a);
15857        assert_eq!(256, e);
15858    }
15859
15860    #[simd_test(enable = "avx512bw,avx512vl")]
15861    unsafe fn test_mm256_mask_reduce_mul_epi16() {
15862        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15863        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
15864        assert_eq!(1, e);
15865    }
15866
15867    #[simd_test(enable = "avx512bw,avx512vl")]
15868    unsafe fn test_mm_reduce_mul_epi16() {
15869        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
15870        let e = _mm_reduce_mul_epi16(a);
15871        assert_eq!(16, e);
15872    }
15873
15874    #[simd_test(enable = "avx512bw,avx512vl")]
15875    unsafe fn test_mm_mask_reduce_mul_epi16() {
15876        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15877        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
15878        assert_eq!(1, e);
15879    }
15880
15881    #[simd_test(enable = "avx512bw,avx512vl")]
15882    unsafe fn test_mm256_reduce_mul_epi8() {
15883        let a = _mm256_set_epi8(
15884            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15885            2, 2, 2,
15886        );
15887        let e = _mm256_reduce_mul_epi8(a);
15888        assert_eq!(64, e);
15889    }
15890
15891    #[simd_test(enable = "avx512bw,avx512vl")]
15892    unsafe fn test_mm256_mask_reduce_mul_epi8() {
15893        let a = _mm256_set_epi8(
15894            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15895            2, 2, 2,
15896        );
15897        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
15898        assert_eq!(1, e);
15899    }
15900
15901    #[simd_test(enable = "avx512bw,avx512vl")]
15902    unsafe fn test_mm_reduce_mul_epi8() {
15903        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15904        let e = _mm_reduce_mul_epi8(a);
15905        assert_eq!(8, e);
15906    }
15907
15908    #[simd_test(enable = "avx512bw,avx512vl")]
15909    unsafe fn test_mm_mask_reduce_mul_epi8() {
15910        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15911        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
15912        assert_eq!(1, e);
15913    }
15914
15915    #[simd_test(enable = "avx512bw,avx512vl")]
15916    unsafe fn test_mm256_reduce_max_epi16() {
15917        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15918        let e: i16 = _mm256_reduce_max_epi16(a);
15919        assert_eq!(15, e);
15920    }
15921
15922    #[simd_test(enable = "avx512bw,avx512vl")]
15923    unsafe fn test_mm256_mask_reduce_max_epi16() {
15924        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15925        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
15926        assert_eq!(7, e);
15927    }
15928
15929    #[simd_test(enable = "avx512bw,avx512vl")]
15930    unsafe fn test_mm_reduce_max_epi16() {
15931        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15932        let e: i16 = _mm_reduce_max_epi16(a);
15933        assert_eq!(7, e);
15934    }
15935
15936    #[simd_test(enable = "avx512bw,avx512vl")]
15937    unsafe fn test_mm_mask_reduce_max_epi16() {
15938        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15939        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
15940        assert_eq!(3, e);
15941    }
15942
15943    #[simd_test(enable = "avx512bw,avx512vl")]
15944    unsafe fn test_mm256_reduce_max_epi8() {
15945        let a = _mm256_set_epi8(
15946            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15947            24, 25, 26, 27, 28, 29, 30, 31,
15948        );
15949        let e: i8 = _mm256_reduce_max_epi8(a);
15950        assert_eq!(31, e);
15951    }
15952
15953    #[simd_test(enable = "avx512bw,avx512vl")]
15954    unsafe fn test_mm256_mask_reduce_max_epi8() {
15955        let a = _mm256_set_epi8(
15956            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15957            24, 25, 26, 27, 28, 29, 30, 31,
15958        );
15959        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
15960        assert_eq!(15, e);
15961    }
15962
15963    #[simd_test(enable = "avx512bw,avx512vl")]
15964    unsafe fn test_mm_reduce_max_epi8() {
15965        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15966        let e: i8 = _mm_reduce_max_epi8(a);
15967        assert_eq!(15, e);
15968    }
15969
15970    #[simd_test(enable = "avx512bw,avx512vl")]
15971    unsafe fn test_mm_mask_reduce_max_epi8() {
15972        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15973        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
15974        assert_eq!(7, e);
15975    }
15976
15977    #[simd_test(enable = "avx512bw,avx512vl")]
15978    unsafe fn test_mm256_reduce_max_epu16() {
15979        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15980        let e: u16 = _mm256_reduce_max_epu16(a);
15981        assert_eq!(15, e);
15982    }
15983
15984    #[simd_test(enable = "avx512bw,avx512vl")]
15985    unsafe fn test_mm256_mask_reduce_max_epu16() {
15986        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15987        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
15988        assert_eq!(7, e);
15989    }
15990
15991    #[simd_test(enable = "avx512bw,avx512vl")]
15992    unsafe fn test_mm_reduce_max_epu16() {
15993        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15994        let e: u16 = _mm_reduce_max_epu16(a);
15995        assert_eq!(7, e);
15996    }
15997
15998    #[simd_test(enable = "avx512bw,avx512vl")]
15999    unsafe fn test_mm_mask_reduce_max_epu16() {
16000        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16001        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
16002        assert_eq!(3, e);
16003    }
16004
16005    #[simd_test(enable = "avx512bw,avx512vl")]
16006    unsafe fn test_mm256_reduce_max_epu8() {
16007        let a = _mm256_set_epi8(
16008            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16009            24, 25, 26, 27, 28, 29, 30, 31,
16010        );
16011        let e: u8 = _mm256_reduce_max_epu8(a);
16012        assert_eq!(31, e);
16013    }
16014
16015    #[simd_test(enable = "avx512bw,avx512vl")]
16016    unsafe fn test_mm256_mask_reduce_max_epu8() {
16017        let a = _mm256_set_epi8(
16018            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16019            24, 25, 26, 27, 28, 29, 30, 31,
16020        );
16021        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16022        assert_eq!(15, e);
16023    }
16024
16025    #[simd_test(enable = "avx512bw,avx512vl")]
16026    unsafe fn test_mm_reduce_max_epu8() {
16027        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16028        let e: u8 = _mm_reduce_max_epu8(a);
16029        assert_eq!(15, e);
16030    }
16031
16032    #[simd_test(enable = "avx512bw,avx512vl")]
16033    unsafe fn test_mm_mask_reduce_max_epu8() {
16034        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16035        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16036        assert_eq!(7, e);
16037    }
16038
16039    #[simd_test(enable = "avx512bw,avx512vl")]
16040    unsafe fn test_mm256_reduce_min_epi16() {
16041        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16042        let e: i16 = _mm256_reduce_min_epi16(a);
16043        assert_eq!(0, e);
16044    }
16045
16046    #[simd_test(enable = "avx512bw,avx512vl")]
16047    unsafe fn test_mm256_mask_reduce_min_epi16() {
16048        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16049        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16050        assert_eq!(0, e);
16051    }
16052
16053    #[simd_test(enable = "avx512bw,avx512vl")]
16054    unsafe fn test_mm_reduce_min_epi16() {
16055        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16056        let e: i16 = _mm_reduce_min_epi16(a);
16057        assert_eq!(0, e);
16058    }
16059
16060    #[simd_test(enable = "avx512bw,avx512vl")]
16061    unsafe fn test_mm_mask_reduce_min_epi16() {
16062        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16063        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16064        assert_eq!(0, e);
16065    }
16066
16067    #[simd_test(enable = "avx512bw,avx512vl")]
16068    unsafe fn test_mm256_reduce_min_epi8() {
16069        let a = _mm256_set_epi8(
16070            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16071            24, 25, 26, 27, 28, 29, 30, 31,
16072        );
16073        let e: i8 = _mm256_reduce_min_epi8(a);
16074        assert_eq!(0, e);
16075    }
16076
16077    #[simd_test(enable = "avx512bw,avx512vl")]
16078    unsafe fn test_mm256_mask_reduce_min_epi8() {
16079        let a = _mm256_set_epi8(
16080            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16081            24, 25, 26, 27, 28, 29, 30, 31,
16082        );
16083        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16084        assert_eq!(0, e);
16085    }
16086
16087    #[simd_test(enable = "avx512bw,avx512vl")]
16088    unsafe fn test_mm_reduce_min_epi8() {
16089        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16090        let e: i8 = _mm_reduce_min_epi8(a);
16091        assert_eq!(0, e);
16092    }
16093
16094    #[simd_test(enable = "avx512bw,avx512vl")]
16095    unsafe fn test_mm_mask_reduce_min_epi8() {
16096        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16097        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16098        assert_eq!(0, e);
16099    }
16100
16101    #[simd_test(enable = "avx512bw,avx512vl")]
16102    unsafe fn test_mm256_reduce_min_epu16() {
16103        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16104        let e: u16 = _mm256_reduce_min_epu16(a);
16105        assert_eq!(0, e);
16106    }
16107
16108    #[simd_test(enable = "avx512bw,avx512vl")]
16109    unsafe fn test_mm256_mask_reduce_min_epu16() {
16110        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16111        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16112        assert_eq!(0, e);
16113    }
16114
16115    #[simd_test(enable = "avx512bw,avx512vl")]
16116    unsafe fn test_mm_reduce_min_epu16() {
16117        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16118        let e: u16 = _mm_reduce_min_epu16(a);
16119        assert_eq!(0, e);
16120    }
16121
16122    #[simd_test(enable = "avx512bw,avx512vl")]
16123    unsafe fn test_mm_mask_reduce_min_epu16() {
16124        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16125        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
16126        assert_eq!(0, e);
16127    }
16128
16129    #[simd_test(enable = "avx512bw,avx512vl")]
16130    unsafe fn test_mm256_reduce_min_epu8() {
16131        let a = _mm256_set_epi8(
16132            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16133            24, 25, 26, 27, 28, 29, 30, 31,
16134        );
16135        let e: u8 = _mm256_reduce_min_epu8(a);
16136        assert_eq!(0, e);
16137    }
16138
16139    #[simd_test(enable = "avx512bw,avx512vl")]
16140    unsafe fn test_mm256_mask_reduce_min_epu8() {
16141        let a = _mm256_set_epi8(
16142            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16143            24, 25, 26, 27, 28, 29, 30, 31,
16144        );
16145        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
16146        assert_eq!(0, e);
16147    }
16148
16149    #[simd_test(enable = "avx512bw,avx512vl")]
16150    unsafe fn test_mm_reduce_min_epu8() {
16151        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16152        let e: u8 = _mm_reduce_min_epu8(a);
16153        assert_eq!(0, e);
16154    }
16155
16156    #[simd_test(enable = "avx512bw,avx512vl")]
16157    unsafe fn test_mm_mask_reduce_min_epu8() {
16158        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16159        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
16160        assert_eq!(0, e);
16161    }
16162
16163    #[simd_test(enable = "avx512bw,avx512vl")]
16164    unsafe fn test_mm256_reduce_or_epi16() {
16165        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16166        let e = _mm256_reduce_or_epi16(a);
16167        assert_eq!(3, e);
16168    }
16169
16170    #[simd_test(enable = "avx512bw,avx512vl")]
16171    unsafe fn test_mm256_mask_reduce_or_epi16() {
16172        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16173        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
16174        assert_eq!(1, e);
16175    }
16176
16177    #[simd_test(enable = "avx512bw,avx512vl")]
16178    unsafe fn test_mm_reduce_or_epi16() {
16179        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16180        let e = _mm_reduce_or_epi16(a);
16181        assert_eq!(3, e);
16182    }
16183
16184    #[simd_test(enable = "avx512bw,avx512vl")]
16185    unsafe fn test_mm_mask_reduce_or_epi16() {
16186        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16187        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
16188        assert_eq!(1, e);
16189    }
16190
16191    #[simd_test(enable = "avx512bw,avx512vl")]
16192    unsafe fn test_mm256_reduce_or_epi8() {
16193        let a = _mm256_set_epi8(
16194            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16195            2, 2, 2,
16196        );
16197        let e = _mm256_reduce_or_epi8(a);
16198        assert_eq!(3, e);
16199    }
16200
16201    #[simd_test(enable = "avx512bw,avx512vl")]
16202    unsafe fn test_mm256_mask_reduce_or_epi8() {
16203        let a = _mm256_set_epi8(
16204            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16205            2, 2, 2,
16206        );
16207        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
16208        assert_eq!(1, e);
16209    }
16210
16211    #[simd_test(enable = "avx512bw,avx512vl")]
16212    unsafe fn test_mm_reduce_or_epi8() {
16213        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16214        let e = _mm_reduce_or_epi8(a);
16215        assert_eq!(3, e);
16216    }
16217
16218    #[simd_test(enable = "avx512bw,avx512vl")]
16219    unsafe fn test_mm_mask_reduce_or_epi8() {
16220        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16221        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
16222        assert_eq!(1, e);
16223    }
16224
16225    #[simd_test(enable = "avx512bw")]
16226    unsafe fn test_mm512_loadu_epi16() {
16227        #[rustfmt::skip]
16228        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16229        let r = _mm512_loadu_epi16(&a[0]);
16230        #[rustfmt::skip]
16231        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16232        assert_eq_m512i(r, e);
16233    }
16234
16235    #[simd_test(enable = "avx512bw,avx512vl")]
16236    unsafe fn test_mm256_loadu_epi16() {
16237        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16238        let r = _mm256_loadu_epi16(&a[0]);
16239        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16240        assert_eq_m256i(r, e);
16241    }
16242
16243    #[simd_test(enable = "avx512bw,avx512vl")]
16244    unsafe fn test_mm_loadu_epi16() {
16245        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
16246        let r = _mm_loadu_epi16(&a[0]);
16247        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
16248        assert_eq_m128i(r, e);
16249    }
16250
16251    #[simd_test(enable = "avx512bw")]
16252    unsafe fn test_mm512_loadu_epi8() {
16253        #[rustfmt::skip]
16254        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16255                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16256        let r = _mm512_loadu_epi8(&a[0]);
16257        #[rustfmt::skip]
16258        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
16259                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16260        assert_eq_m512i(r, e);
16261    }
16262
16263    #[simd_test(enable = "avx512bw,avx512vl")]
16264    unsafe fn test_mm256_loadu_epi8() {
16265        #[rustfmt::skip]
16266        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16267        let r = _mm256_loadu_epi8(&a[0]);
16268        #[rustfmt::skip]
16269        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16270        assert_eq_m256i(r, e);
16271    }
16272
16273    #[simd_test(enable = "avx512bw,avx512vl")]
16274    unsafe fn test_mm_loadu_epi8() {
16275        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16276        let r = _mm_loadu_epi8(&a[0]);
16277        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16278        assert_eq_m128i(r, e);
16279    }
16280
16281    #[simd_test(enable = "avx512bw")]
16282    unsafe fn test_mm512_storeu_epi16() {
16283        let a = _mm512_set1_epi16(9);
16284        let mut r = _mm512_undefined_epi32();
16285        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16286        assert_eq_m512i(r, a);
16287    }
16288
16289    #[simd_test(enable = "avx512bw,avx512vl")]
16290    unsafe fn test_mm256_storeu_epi16() {
16291        let a = _mm256_set1_epi16(9);
16292        let mut r = _mm256_set1_epi32(0);
16293        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16294        assert_eq_m256i(r, a);
16295    }
16296
16297    #[simd_test(enable = "avx512bw,avx512vl")]
16298    unsafe fn test_mm_storeu_epi16() {
16299        let a = _mm_set1_epi16(9);
16300        let mut r = _mm_set1_epi32(0);
16301        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16302        assert_eq_m128i(r, a);
16303    }
16304
16305    #[simd_test(enable = "avx512bw")]
16306    unsafe fn test_mm512_storeu_epi8() {
16307        let a = _mm512_set1_epi8(9);
16308        let mut r = _mm512_undefined_epi32();
16309        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16310        assert_eq_m512i(r, a);
16311    }
16312
16313    #[simd_test(enable = "avx512bw,avx512vl")]
16314    unsafe fn test_mm256_storeu_epi8() {
16315        let a = _mm256_set1_epi8(9);
16316        let mut r = _mm256_set1_epi32(0);
16317        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16318        assert_eq_m256i(r, a);
16319    }
16320
16321    #[simd_test(enable = "avx512bw,avx512vl")]
16322    unsafe fn test_mm_storeu_epi8() {
16323        let a = _mm_set1_epi8(9);
16324        let mut r = _mm_set1_epi32(0);
16325        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16326        assert_eq_m128i(r, a);
16327    }
16328
16329    #[simd_test(enable = "avx512bw")]
16330    unsafe fn test_mm512_mask_loadu_epi16() {
16331        let src = _mm512_set1_epi16(42);
16332        let a = &[
16333            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16334            24, 25, 26, 27, 28, 29, 30, 31, 32,
16335        ];
16336        let p = a.as_ptr();
16337        let m = 0b10101010_11001100_11101000_11001010;
16338        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
16339        let e = &[
16340            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16341            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16342        ];
16343        let e = _mm512_loadu_epi16(e.as_ptr());
16344        assert_eq_m512i(r, e);
16345    }
16346
16347    #[simd_test(enable = "avx512bw")]
16348    unsafe fn test_mm512_maskz_loadu_epi16() {
16349        let a = &[
16350            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16351            24, 25, 26, 27, 28, 29, 30, 31, 32,
16352        ];
16353        let p = a.as_ptr();
16354        let m = 0b10101010_11001100_11101000_11001010;
16355        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
16356        let e = &[
16357            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16358            26, 0, 28, 0, 30, 0, 32,
16359        ];
16360        let e = _mm512_loadu_epi16(e.as_ptr());
16361        assert_eq_m512i(r, e);
16362    }
16363
16364    #[simd_test(enable = "avx512bw")]
16365    unsafe fn test_mm512_mask_storeu_epi16() {
16366        let mut r = [42_i16; 32];
16367        let a = &[
16368            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16369            24, 25, 26, 27, 28, 29, 30, 31, 32,
16370        ];
16371        let a = _mm512_loadu_epi16(a.as_ptr());
16372        let m = 0b10101010_11001100_11101000_11001010;
16373        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16374        let e = &[
16375            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16376            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16377        ];
16378        let e = _mm512_loadu_epi16(e.as_ptr());
16379        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
16380    }
16381
16382    #[simd_test(enable = "avx512bw")]
16383    unsafe fn test_mm512_mask_loadu_epi8() {
16384        let src = _mm512_set1_epi8(42);
16385        let a = &[
16386            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16387            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16388            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16389        ];
16390        let p = a.as_ptr();
16391        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16392        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
16393        let e = &[
16394            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16395            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16396            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16397        ];
16398        let e = _mm512_loadu_epi8(e.as_ptr());
16399        assert_eq_m512i(r, e);
16400    }
16401
16402    #[simd_test(enable = "avx512bw")]
16403    unsafe fn test_mm512_maskz_loadu_epi8() {
16404        let a = &[
16405            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16406            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16407            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16408        ];
16409        let p = a.as_ptr();
16410        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16411        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
16412        let e = &[
16413            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16414            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
16415            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
16416        ];
16417        let e = _mm512_loadu_epi8(e.as_ptr());
16418        assert_eq_m512i(r, e);
16419    }
16420
16421    #[simd_test(enable = "avx512bw")]
16422    unsafe fn test_mm512_mask_storeu_epi8() {
16423        let mut r = [42_i8; 64];
16424        let a = &[
16425            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16426            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16427            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16428        ];
16429        let a = _mm512_loadu_epi8(a.as_ptr());
16430        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16431        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16432        let e = &[
16433            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16434            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16435            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16436        ];
16437        let e = _mm512_loadu_epi8(e.as_ptr());
16438        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
16439    }
16440
16441    #[simd_test(enable = "avx512bw,avx512vl")]
16442    unsafe fn test_mm256_mask_loadu_epi16() {
16443        let src = _mm256_set1_epi16(42);
16444        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16445        let p = a.as_ptr();
16446        let m = 0b11101000_11001010;
16447        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
16448        let e = &[
16449            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16450        ];
16451        let e = _mm256_loadu_epi16(e.as_ptr());
16452        assert_eq_m256i(r, e);
16453    }
16454
16455    #[simd_test(enable = "avx512bw,avx512vl")]
16456    unsafe fn test_mm256_maskz_loadu_epi16() {
16457        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16458        let p = a.as_ptr();
16459        let m = 0b11101000_11001010;
16460        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
16461        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16462        let e = _mm256_loadu_epi16(e.as_ptr());
16463        assert_eq_m256i(r, e);
16464    }
16465
16466    #[simd_test(enable = "avx512bw,avx512vl")]
16467    unsafe fn test_mm256_mask_storeu_epi16() {
16468        let mut r = [42_i16; 16];
16469        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16470        let a = _mm256_loadu_epi16(a.as_ptr());
16471        let m = 0b11101000_11001010;
16472        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16473        let e = &[
16474            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16475        ];
16476        let e = _mm256_loadu_epi16(e.as_ptr());
16477        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
16478    }
16479
16480    #[simd_test(enable = "avx512bw,avx512vl")]
16481    unsafe fn test_mm256_mask_loadu_epi8() {
16482        let src = _mm256_set1_epi8(42);
16483        let a = &[
16484            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16485            24, 25, 26, 27, 28, 29, 30, 31, 32,
16486        ];
16487        let p = a.as_ptr();
16488        let m = 0b10101010_11001100_11101000_11001010;
16489        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
16490        let e = &[
16491            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16492            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16493        ];
16494        let e = _mm256_loadu_epi8(e.as_ptr());
16495        assert_eq_m256i(r, e);
16496    }
16497
16498    #[simd_test(enable = "avx512bw,avx512vl")]
16499    unsafe fn test_mm256_maskz_loadu_epi8() {
16500        let a = &[
16501            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16502            24, 25, 26, 27, 28, 29, 30, 31, 32,
16503        ];
16504        let p = a.as_ptr();
16505        let m = 0b10101010_11001100_11101000_11001010;
16506        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
16507        let e = &[
16508            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16509            26, 0, 28, 0, 30, 0, 32,
16510        ];
16511        let e = _mm256_loadu_epi8(e.as_ptr());
16512        assert_eq_m256i(r, e);
16513    }
16514
16515    #[simd_test(enable = "avx512bw,avx512vl")]
16516    unsafe fn test_mm256_mask_storeu_epi8() {
16517        let mut r = [42_i8; 32];
16518        let a = &[
16519            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16520            24, 25, 26, 27, 28, 29, 30, 31, 32,
16521        ];
16522        let a = _mm256_loadu_epi8(a.as_ptr());
16523        let m = 0b10101010_11001100_11101000_11001010;
16524        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16525        let e = &[
16526            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16527            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16528        ];
16529        let e = _mm256_loadu_epi8(e.as_ptr());
16530        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
16531    }
16532
16533    #[simd_test(enable = "avx512bw,avx512vl")]
16534    unsafe fn test_mm_mask_loadu_epi16() {
16535        let src = _mm_set1_epi16(42);
16536        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16537        let p = a.as_ptr();
16538        let m = 0b11001010;
16539        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
16540        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16541        let e = _mm_loadu_epi16(e.as_ptr());
16542        assert_eq_m128i(r, e);
16543    }
16544
16545    #[simd_test(enable = "avx512bw,avx512vl")]
16546    unsafe fn test_mm_maskz_loadu_epi16() {
16547        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16548        let p = a.as_ptr();
16549        let m = 0b11001010;
16550        let r = _mm_maskz_loadu_epi16(m, black_box(p));
16551        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
16552        let e = _mm_loadu_epi16(e.as_ptr());
16553        assert_eq_m128i(r, e);
16554    }
16555
16556    #[simd_test(enable = "avx512bw,avx512vl")]
16557    unsafe fn test_mm_mask_storeu_epi16() {
16558        let mut r = [42_i16; 8];
16559        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16560        let a = _mm_loadu_epi16(a.as_ptr());
16561        let m = 0b11001010;
16562        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16563        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16564        let e = _mm_loadu_epi16(e.as_ptr());
16565        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
16566    }
16567
16568    #[simd_test(enable = "avx512bw,avx512vl")]
16569    unsafe fn test_mm_mask_loadu_epi8() {
16570        let src = _mm_set1_epi8(42);
16571        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16572        let p = a.as_ptr();
16573        let m = 0b11101000_11001010;
16574        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
16575        let e = &[
16576            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16577        ];
16578        let e = _mm_loadu_epi8(e.as_ptr());
16579        assert_eq_m128i(r, e);
16580    }
16581
16582    #[simd_test(enable = "avx512bw,avx512vl")]
16583    unsafe fn test_mm_maskz_loadu_epi8() {
16584        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16585        let p = a.as_ptr();
16586        let m = 0b11101000_11001010;
16587        let r = _mm_maskz_loadu_epi8(m, black_box(p));
16588        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16589        let e = _mm_loadu_epi8(e.as_ptr());
16590        assert_eq_m128i(r, e);
16591    }
16592
16593    #[simd_test(enable = "avx512bw,avx512vl")]
16594    unsafe fn test_mm_mask_storeu_epi8() {
16595        let mut r = [42_i8; 16];
16596        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16597        let a = _mm_loadu_epi8(a.as_ptr());
16598        let m = 0b11101000_11001010;
16599        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16600        let e = &[
16601            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16602        ];
16603        let e = _mm_loadu_epi8(e.as_ptr());
16604        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
16605    }
16606
16607    #[simd_test(enable = "avx512bw")]
16608    unsafe fn test_mm512_madd_epi16() {
16609        let a = _mm512_set1_epi16(1);
16610        let b = _mm512_set1_epi16(1);
16611        let r = _mm512_madd_epi16(a, b);
16612        let e = _mm512_set1_epi32(2);
16613        assert_eq_m512i(r, e);
16614    }
16615
16616    #[simd_test(enable = "avx512bw")]
16617    unsafe fn test_mm512_mask_madd_epi16() {
16618        let a = _mm512_set1_epi16(1);
16619        let b = _mm512_set1_epi16(1);
16620        let r = _mm512_mask_madd_epi16(a, 0, a, b);
16621        assert_eq_m512i(r, a);
16622        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
16623        let e = _mm512_set_epi32(
16624            1 << 16 | 1,
16625            1 << 16 | 1,
16626            1 << 16 | 1,
16627            1 << 16 | 1,
16628            1 << 16 | 1,
16629            1 << 16 | 1,
16630            1 << 16 | 1,
16631            1 << 16 | 1,
16632            1 << 16 | 1,
16633            1 << 16 | 1,
16634            1 << 16 | 1,
16635            1 << 16 | 1,
16636            2,
16637            2,
16638            2,
16639            2,
16640        );
16641        assert_eq_m512i(r, e);
16642    }
16643
16644    #[simd_test(enable = "avx512bw")]
16645    unsafe fn test_mm512_maskz_madd_epi16() {
16646        let a = _mm512_set1_epi16(1);
16647        let b = _mm512_set1_epi16(1);
16648        let r = _mm512_maskz_madd_epi16(0, a, b);
16649        assert_eq_m512i(r, _mm512_setzero_si512());
16650        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
16651        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
16652        assert_eq_m512i(r, e);
16653    }
16654
16655    #[simd_test(enable = "avx512bw,avx512vl")]
16656    unsafe fn test_mm256_mask_madd_epi16() {
16657        let a = _mm256_set1_epi16(1);
16658        let b = _mm256_set1_epi16(1);
16659        let r = _mm256_mask_madd_epi16(a, 0, a, b);
16660        assert_eq_m256i(r, a);
16661        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
16662        let e = _mm256_set_epi32(
16663            1 << 16 | 1,
16664            1 << 16 | 1,
16665            1 << 16 | 1,
16666            1 << 16 | 1,
16667            2,
16668            2,
16669            2,
16670            2,
16671        );
16672        assert_eq_m256i(r, e);
16673    }
16674
16675    #[simd_test(enable = "avx512bw,avx512vl")]
16676    unsafe fn test_mm256_maskz_madd_epi16() {
16677        let a = _mm256_set1_epi16(1);
16678        let b = _mm256_set1_epi16(1);
16679        let r = _mm256_maskz_madd_epi16(0, a, b);
16680        assert_eq_m256i(r, _mm256_setzero_si256());
16681        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
16682        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
16683        assert_eq_m256i(r, e);
16684    }
16685
16686    #[simd_test(enable = "avx512bw,avx512vl")]
16687    unsafe fn test_mm_mask_madd_epi16() {
16688        let a = _mm_set1_epi16(1);
16689        let b = _mm_set1_epi16(1);
16690        let r = _mm_mask_madd_epi16(a, 0, a, b);
16691        assert_eq_m128i(r, a);
16692        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
16693        let e = _mm_set_epi32(2, 2, 2, 2);
16694        assert_eq_m128i(r, e);
16695    }
16696
16697    #[simd_test(enable = "avx512bw,avx512vl")]
16698    unsafe fn test_mm_maskz_madd_epi16() {
16699        let a = _mm_set1_epi16(1);
16700        let b = _mm_set1_epi16(1);
16701        let r = _mm_maskz_madd_epi16(0, a, b);
16702        assert_eq_m128i(r, _mm_setzero_si128());
16703        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
16704        let e = _mm_set_epi32(2, 2, 2, 2);
16705        assert_eq_m128i(r, e);
16706    }
16707
16708    #[simd_test(enable = "avx512bw")]
16709    unsafe fn test_mm512_maddubs_epi16() {
16710        let a = _mm512_set1_epi8(1);
16711        let b = _mm512_set1_epi8(1);
16712        let r = _mm512_maddubs_epi16(a, b);
16713        let e = _mm512_set1_epi16(2);
16714        assert_eq_m512i(r, e);
16715    }
16716
16717    #[simd_test(enable = "avx512bw")]
16718    unsafe fn test_mm512_mask_maddubs_epi16() {
16719        let a = _mm512_set1_epi8(1);
16720        let b = _mm512_set1_epi8(1);
16721        let src = _mm512_set1_epi16(1);
16722        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
16723        assert_eq_m512i(r, src);
16724        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
16725        #[rustfmt::skip]
16726        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16727                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
16728        assert_eq_m512i(r, e);
16729    }
16730
16731    #[simd_test(enable = "avx512bw")]
16732    unsafe fn test_mm512_maskz_maddubs_epi16() {
16733        let a = _mm512_set1_epi8(1);
16734        let b = _mm512_set1_epi8(1);
16735        let r = _mm512_maskz_maddubs_epi16(0, a, b);
16736        assert_eq_m512i(r, _mm512_setzero_si512());
16737        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
16738        #[rustfmt::skip]
16739        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
16740                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16741        assert_eq_m512i(r, e);
16742    }
16743
16744    #[simd_test(enable = "avx512bw,avx512vl")]
16745    unsafe fn test_mm256_mask_maddubs_epi16() {
16746        let a = _mm256_set1_epi8(1);
16747        let b = _mm256_set1_epi8(1);
16748        let src = _mm256_set1_epi16(1);
16749        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
16750        assert_eq_m256i(r, src);
16751        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
16752        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16753        assert_eq_m256i(r, e);
16754    }
16755
16756    #[simd_test(enable = "avx512bw,avx512vl")]
16757    unsafe fn test_mm256_maskz_maddubs_epi16() {
16758        let a = _mm256_set1_epi8(1);
16759        let b = _mm256_set1_epi8(1);
16760        let r = _mm256_maskz_maddubs_epi16(0, a, b);
16761        assert_eq_m256i(r, _mm256_setzero_si256());
16762        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
16763        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16764        assert_eq_m256i(r, e);
16765    }
16766
16767    #[simd_test(enable = "avx512bw,avx512vl")]
16768    unsafe fn test_mm_mask_maddubs_epi16() {
16769        let a = _mm_set1_epi8(1);
16770        let b = _mm_set1_epi8(1);
16771        let src = _mm_set1_epi16(1);
16772        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
16773        assert_eq_m128i(r, src);
16774        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
16775        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16776        assert_eq_m128i(r, e);
16777    }
16778
16779    #[simd_test(enable = "avx512bw,avx512vl")]
16780    unsafe fn test_mm_maskz_maddubs_epi16() {
16781        let a = _mm_set1_epi8(1);
16782        let b = _mm_set1_epi8(1);
16783        let r = _mm_maskz_maddubs_epi16(0, a, b);
16784        assert_eq_m128i(r, _mm_setzero_si128());
16785        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
16786        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
16787        assert_eq_m128i(r, e);
16788    }
16789
16790    #[simd_test(enable = "avx512bw")]
16791    unsafe fn test_mm512_packs_epi32() {
16792        let a = _mm512_set1_epi32(i32::MAX);
16793        let b = _mm512_set1_epi32(1);
16794        let r = _mm512_packs_epi32(a, b);
16795        #[rustfmt::skip]
16796        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
16797                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16798        assert_eq_m512i(r, e);
16799    }
16800
16801    #[simd_test(enable = "avx512bw")]
16802    unsafe fn test_mm512_mask_packs_epi32() {
16803        let a = _mm512_set1_epi32(i32::MAX);
16804        let b = _mm512_set1_epi32(1 << 16 | 1);
16805        let r = _mm512_mask_packs_epi32(a, 0, a, b);
16806        assert_eq_m512i(r, a);
16807        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16808        #[rustfmt::skip]
16809        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16810                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16811        assert_eq_m512i(r, e);
16812    }
16813
16814    #[simd_test(enable = "avx512bw")]
16815    unsafe fn test_mm512_maskz_packs_epi32() {
16816        let a = _mm512_set1_epi32(i32::MAX);
16817        let b = _mm512_set1_epi32(1);
16818        let r = _mm512_maskz_packs_epi32(0, a, b);
16819        assert_eq_m512i(r, _mm512_setzero_si512());
16820        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
16821        #[rustfmt::skip]
16822        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16823                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16824        assert_eq_m512i(r, e);
16825    }
16826
16827    #[simd_test(enable = "avx512bw,avx512vl")]
16828    unsafe fn test_mm256_mask_packs_epi32() {
16829        let a = _mm256_set1_epi32(i32::MAX);
16830        let b = _mm256_set1_epi32(1 << 16 | 1);
16831        let r = _mm256_mask_packs_epi32(a, 0, a, b);
16832        assert_eq_m256i(r, a);
16833        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
16834        #[rustfmt::skip]
16835        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16836        assert_eq_m256i(r, e);
16837    }
16838
16839    #[simd_test(enable = "avx512bw,avx512vl")]
16840    unsafe fn test_mm256_maskz_packs_epi32() {
16841        let a = _mm256_set1_epi32(i32::MAX);
16842        let b = _mm256_set1_epi32(1);
16843        let r = _mm256_maskz_packs_epi32(0, a, b);
16844        assert_eq_m256i(r, _mm256_setzero_si256());
16845        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
16846        #[rustfmt::skip]
16847        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16848        assert_eq_m256i(r, e);
16849    }
16850
16851    #[simd_test(enable = "avx512bw,avx512vl")]
16852    unsafe fn test_mm_mask_packs_epi32() {
16853        let a = _mm_set1_epi32(i32::MAX);
16854        let b = _mm_set1_epi32(1 << 16 | 1);
16855        let r = _mm_mask_packs_epi32(a, 0, a, b);
16856        assert_eq_m128i(r, a);
16857        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
16858        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16859        assert_eq_m128i(r, e);
16860    }
16861
16862    #[simd_test(enable = "avx512bw,avx512vl")]
16863    unsafe fn test_mm_maskz_packs_epi32() {
16864        let a = _mm_set1_epi32(i32::MAX);
16865        let b = _mm_set1_epi32(1);
16866        let r = _mm_maskz_packs_epi32(0, a, b);
16867        assert_eq_m128i(r, _mm_setzero_si128());
16868        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
16869        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16870        assert_eq_m128i(r, e);
16871    }
16872
16873    #[simd_test(enable = "avx512bw")]
16874    unsafe fn test_mm512_packs_epi16() {
16875        let a = _mm512_set1_epi16(i16::MAX);
16876        let b = _mm512_set1_epi16(1);
16877        let r = _mm512_packs_epi16(a, b);
16878        #[rustfmt::skip]
16879        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16880                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16881                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16882                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16883        assert_eq_m512i(r, e);
16884    }
16885
16886    #[simd_test(enable = "avx512bw")]
16887    unsafe fn test_mm512_mask_packs_epi16() {
16888        let a = _mm512_set1_epi16(i16::MAX);
16889        let b = _mm512_set1_epi16(1 << 8 | 1);
16890        let r = _mm512_mask_packs_epi16(a, 0, a, b);
16891        assert_eq_m512i(r, a);
16892        let r = _mm512_mask_packs_epi16(
16893            b,
16894            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16895            a,
16896            b,
16897        );
16898        #[rustfmt::skip]
16899        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16900                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16901                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16902                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16903        assert_eq_m512i(r, e);
16904    }
16905
16906    #[simd_test(enable = "avx512bw")]
16907    unsafe fn test_mm512_maskz_packs_epi16() {
16908        let a = _mm512_set1_epi16(i16::MAX);
16909        let b = _mm512_set1_epi16(1);
16910        let r = _mm512_maskz_packs_epi16(0, a, b);
16911        assert_eq_m512i(r, _mm512_setzero_si512());
16912        let r = _mm512_maskz_packs_epi16(
16913            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16914            a,
16915            b,
16916        );
16917        #[rustfmt::skip]
16918        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16919                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16920                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16921                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16922        assert_eq_m512i(r, e);
16923    }
16924
16925    #[simd_test(enable = "avx512bw,avx512vl")]
16926    unsafe fn test_mm256_mask_packs_epi16() {
16927        let a = _mm256_set1_epi16(i16::MAX);
16928        let b = _mm256_set1_epi16(1 << 8 | 1);
16929        let r = _mm256_mask_packs_epi16(a, 0, a, b);
16930        assert_eq_m256i(r, a);
16931        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
16932        #[rustfmt::skip]
16933        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16934                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16935        assert_eq_m256i(r, e);
16936    }
16937
16938    #[simd_test(enable = "avx512bw,avx512vl")]
16939    unsafe fn test_mm256_maskz_packs_epi16() {
16940        let a = _mm256_set1_epi16(i16::MAX);
16941        let b = _mm256_set1_epi16(1);
16942        let r = _mm256_maskz_packs_epi16(0, a, b);
16943        assert_eq_m256i(r, _mm256_setzero_si256());
16944        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
16945        #[rustfmt::skip]
16946        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16947                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16948        assert_eq_m256i(r, e);
16949    }
16950
16951    #[simd_test(enable = "avx512bw,avx512vl")]
16952    unsafe fn test_mm_mask_packs_epi16() {
16953        let a = _mm_set1_epi16(i16::MAX);
16954        let b = _mm_set1_epi16(1 << 8 | 1);
16955        let r = _mm_mask_packs_epi16(a, 0, a, b);
16956        assert_eq_m128i(r, a);
16957        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
16958        #[rustfmt::skip]
16959        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16960        assert_eq_m128i(r, e);
16961    }
16962
16963    #[simd_test(enable = "avx512bw,avx512vl")]
16964    unsafe fn test_mm_maskz_packs_epi16() {
16965        let a = _mm_set1_epi16(i16::MAX);
16966        let b = _mm_set1_epi16(1);
16967        let r = _mm_maskz_packs_epi16(0, a, b);
16968        assert_eq_m128i(r, _mm_setzero_si128());
16969        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
16970        #[rustfmt::skip]
16971        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16972        assert_eq_m128i(r, e);
16973    }
16974
16975    #[simd_test(enable = "avx512bw")]
16976    unsafe fn test_mm512_packus_epi32() {
16977        let a = _mm512_set1_epi32(-1);
16978        let b = _mm512_set1_epi32(1);
16979        let r = _mm512_packus_epi32(a, b);
16980        #[rustfmt::skip]
16981        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
16982                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
16983        assert_eq_m512i(r, e);
16984    }
16985
16986    #[simd_test(enable = "avx512bw")]
16987    unsafe fn test_mm512_mask_packus_epi32() {
16988        let a = _mm512_set1_epi32(-1);
16989        let b = _mm512_set1_epi32(1 << 16 | 1);
16990        let r = _mm512_mask_packus_epi32(a, 0, a, b);
16991        assert_eq_m512i(r, a);
16992        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16993        #[rustfmt::skip]
16994        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16995                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16996        assert_eq_m512i(r, e);
16997    }
16998
16999    #[simd_test(enable = "avx512bw")]
17000    unsafe fn test_mm512_maskz_packus_epi32() {
17001        let a = _mm512_set1_epi32(-1);
17002        let b = _mm512_set1_epi32(1);
17003        let r = _mm512_maskz_packus_epi32(0, a, b);
17004        assert_eq_m512i(r, _mm512_setzero_si512());
17005        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
17006        #[rustfmt::skip]
17007        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17008                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17009        assert_eq_m512i(r, e);
17010    }
17011
17012    #[simd_test(enable = "avx512bw,avx512vl")]
17013    unsafe fn test_mm256_mask_packus_epi32() {
17014        let a = _mm256_set1_epi32(-1);
17015        let b = _mm256_set1_epi32(1 << 16 | 1);
17016        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17017        assert_eq_m256i(r, a);
17018        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17019        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17020        assert_eq_m256i(r, e);
17021    }
17022
17023    #[simd_test(enable = "avx512bw,avx512vl")]
17024    unsafe fn test_mm256_maskz_packus_epi32() {
17025        let a = _mm256_set1_epi32(-1);
17026        let b = _mm256_set1_epi32(1);
17027        let r = _mm256_maskz_packus_epi32(0, a, b);
17028        assert_eq_m256i(r, _mm256_setzero_si256());
17029        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17030        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17031        assert_eq_m256i(r, e);
17032    }
17033
17034    #[simd_test(enable = "avx512bw,avx512vl")]
17035    unsafe fn test_mm_mask_packus_epi32() {
17036        let a = _mm_set1_epi32(-1);
17037        let b = _mm_set1_epi32(1 << 16 | 1);
17038        let r = _mm_mask_packus_epi32(a, 0, a, b);
17039        assert_eq_m128i(r, a);
17040        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17041        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17042        assert_eq_m128i(r, e);
17043    }
17044
17045    #[simd_test(enable = "avx512bw,avx512vl")]
17046    unsafe fn test_mm_maskz_packus_epi32() {
17047        let a = _mm_set1_epi32(-1);
17048        let b = _mm_set1_epi32(1);
17049        let r = _mm_maskz_packus_epi32(0, a, b);
17050        assert_eq_m128i(r, _mm_setzero_si128());
17051        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17052        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17053        assert_eq_m128i(r, e);
17054    }
17055
17056    #[simd_test(enable = "avx512bw")]
17057    unsafe fn test_mm512_packus_epi16() {
17058        let a = _mm512_set1_epi16(-1);
17059        let b = _mm512_set1_epi16(1);
17060        let r = _mm512_packus_epi16(a, b);
17061        #[rustfmt::skip]
17062        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17063                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17064                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17065                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17066        assert_eq_m512i(r, e);
17067    }
17068
17069    #[simd_test(enable = "avx512bw")]
17070    unsafe fn test_mm512_mask_packus_epi16() {
17071        let a = _mm512_set1_epi16(-1);
17072        let b = _mm512_set1_epi16(1 << 8 | 1);
17073        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17074        assert_eq_m512i(r, a);
17075        let r = _mm512_mask_packus_epi16(
17076            b,
17077            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17078            a,
17079            b,
17080        );
17081        #[rustfmt::skip]
17082        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17083                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17084                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17085                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17086        assert_eq_m512i(r, e);
17087    }
17088
17089    #[simd_test(enable = "avx512bw")]
17090    unsafe fn test_mm512_maskz_packus_epi16() {
17091        let a = _mm512_set1_epi16(-1);
17092        let b = _mm512_set1_epi16(1);
17093        let r = _mm512_maskz_packus_epi16(0, a, b);
17094        assert_eq_m512i(r, _mm512_setzero_si512());
17095        let r = _mm512_maskz_packus_epi16(
17096            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17097            a,
17098            b,
17099        );
17100        #[rustfmt::skip]
17101        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17102                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17103                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17104                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17105        assert_eq_m512i(r, e);
17106    }
17107
17108    #[simd_test(enable = "avx512bw,avx512vl")]
17109    unsafe fn test_mm256_mask_packus_epi16() {
17110        let a = _mm256_set1_epi16(-1);
17111        let b = _mm256_set1_epi16(1 << 8 | 1);
17112        let r = _mm256_mask_packus_epi16(a, 0, a, b);
17113        assert_eq_m256i(r, a);
17114        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17115        #[rustfmt::skip]
17116        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17117                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17118        assert_eq_m256i(r, e);
17119    }
17120
17121    #[simd_test(enable = "avx512bw,avx512vl")]
17122    unsafe fn test_mm256_maskz_packus_epi16() {
17123        let a = _mm256_set1_epi16(-1);
17124        let b = _mm256_set1_epi16(1);
17125        let r = _mm256_maskz_packus_epi16(0, a, b);
17126        assert_eq_m256i(r, _mm256_setzero_si256());
17127        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
17128        #[rustfmt::skip]
17129        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17130                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17131        assert_eq_m256i(r, e);
17132    }
17133
17134    #[simd_test(enable = "avx512bw,avx512vl")]
17135    unsafe fn test_mm_mask_packus_epi16() {
17136        let a = _mm_set1_epi16(-1);
17137        let b = _mm_set1_epi16(1 << 8 | 1);
17138        let r = _mm_mask_packus_epi16(a, 0, a, b);
17139        assert_eq_m128i(r, a);
17140        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
17141        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17142        assert_eq_m128i(r, e);
17143    }
17144
17145    #[simd_test(enable = "avx512bw,avx512vl")]
17146    unsafe fn test_mm_maskz_packus_epi16() {
17147        let a = _mm_set1_epi16(-1);
17148        let b = _mm_set1_epi16(1);
17149        let r = _mm_maskz_packus_epi16(0, a, b);
17150        assert_eq_m128i(r, _mm_setzero_si128());
17151        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
17152        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17153        assert_eq_m128i(r, e);
17154    }
17155
17156    #[simd_test(enable = "avx512bw")]
17157    unsafe fn test_mm512_avg_epu16() {
17158        let a = _mm512_set1_epi16(1);
17159        let b = _mm512_set1_epi16(1);
17160        let r = _mm512_avg_epu16(a, b);
17161        let e = _mm512_set1_epi16(1);
17162        assert_eq_m512i(r, e);
17163    }
17164
17165    #[simd_test(enable = "avx512bw")]
17166    unsafe fn test_mm512_mask_avg_epu16() {
17167        let a = _mm512_set1_epi16(1);
17168        let b = _mm512_set1_epi16(1);
17169        let r = _mm512_mask_avg_epu16(a, 0, a, b);
17170        assert_eq_m512i(r, a);
17171        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
17172        #[rustfmt::skip]
17173        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17174                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17175        assert_eq_m512i(r, e);
17176    }
17177
17178    #[simd_test(enable = "avx512bw")]
17179    unsafe fn test_mm512_maskz_avg_epu16() {
17180        let a = _mm512_set1_epi16(1);
17181        let b = _mm512_set1_epi16(1);
17182        let r = _mm512_maskz_avg_epu16(0, a, b);
17183        assert_eq_m512i(r, _mm512_setzero_si512());
17184        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
17185        #[rustfmt::skip]
17186        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17187                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17188        assert_eq_m512i(r, e);
17189    }
17190
17191    #[simd_test(enable = "avx512bw,avx512vl")]
17192    unsafe fn test_mm256_mask_avg_epu16() {
17193        let a = _mm256_set1_epi16(1);
17194        let b = _mm256_set1_epi16(1);
17195        let r = _mm256_mask_avg_epu16(a, 0, a, b);
17196        assert_eq_m256i(r, a);
17197        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
17198        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17199        assert_eq_m256i(r, e);
17200    }
17201
17202    #[simd_test(enable = "avx512bw,avx512vl")]
17203    unsafe fn test_mm256_maskz_avg_epu16() {
17204        let a = _mm256_set1_epi16(1);
17205        let b = _mm256_set1_epi16(1);
17206        let r = _mm256_maskz_avg_epu16(0, a, b);
17207        assert_eq_m256i(r, _mm256_setzero_si256());
17208        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
17209        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17210        assert_eq_m256i(r, e);
17211    }
17212
17213    #[simd_test(enable = "avx512bw,avx512vl")]
17214    unsafe fn test_mm_mask_avg_epu16() {
17215        let a = _mm_set1_epi16(1);
17216        let b = _mm_set1_epi16(1);
17217        let r = _mm_mask_avg_epu16(a, 0, a, b);
17218        assert_eq_m128i(r, a);
17219        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
17220        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
17221        assert_eq_m128i(r, e);
17222    }
17223
17224    #[simd_test(enable = "avx512bw,avx512vl")]
17225    unsafe fn test_mm_maskz_avg_epu16() {
17226        let a = _mm_set1_epi16(1);
17227        let b = _mm_set1_epi16(1);
17228        let r = _mm_maskz_avg_epu16(0, a, b);
17229        assert_eq_m128i(r, _mm_setzero_si128());
17230        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
17231        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
17232        assert_eq_m128i(r, e);
17233    }
17234
17235    #[simd_test(enable = "avx512bw")]
17236    unsafe fn test_mm512_avg_epu8() {
17237        let a = _mm512_set1_epi8(1);
17238        let b = _mm512_set1_epi8(1);
17239        let r = _mm512_avg_epu8(a, b);
17240        let e = _mm512_set1_epi8(1);
17241        assert_eq_m512i(r, e);
17242    }
17243
17244    #[simd_test(enable = "avx512bw")]
17245    unsafe fn test_mm512_mask_avg_epu8() {
17246        let a = _mm512_set1_epi8(1);
17247        let b = _mm512_set1_epi8(1);
17248        let r = _mm512_mask_avg_epu8(a, 0, a, b);
17249        assert_eq_m512i(r, a);
17250        let r = _mm512_mask_avg_epu8(
17251            a,
17252            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17253            a,
17254            b,
17255        );
17256        #[rustfmt::skip]
17257        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17258                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17259                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17260                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17261        assert_eq_m512i(r, e);
17262    }
17263
17264    #[simd_test(enable = "avx512bw")]
17265    unsafe fn test_mm512_maskz_avg_epu8() {
17266        let a = _mm512_set1_epi8(1);
17267        let b = _mm512_set1_epi8(1);
17268        let r = _mm512_maskz_avg_epu8(0, a, b);
17269        assert_eq_m512i(r, _mm512_setzero_si512());
17270        let r = _mm512_maskz_avg_epu8(
17271            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
17272            a,
17273            b,
17274        );
17275        #[rustfmt::skip]
17276        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17277                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17278                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17279                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17280        assert_eq_m512i(r, e);
17281    }
17282
17283    #[simd_test(enable = "avx512bw,avx512vl")]
17284    unsafe fn test_mm256_mask_avg_epu8() {
17285        let a = _mm256_set1_epi8(1);
17286        let b = _mm256_set1_epi8(1);
17287        let r = _mm256_mask_avg_epu8(a, 0, a, b);
17288        assert_eq_m256i(r, a);
17289        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
17290        #[rustfmt::skip]
17291        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17292                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17293        assert_eq_m256i(r, e);
17294    }
17295
17296    #[simd_test(enable = "avx512bw,avx512vl")]
17297    unsafe fn test_mm256_maskz_avg_epu8() {
17298        let a = _mm256_set1_epi8(1);
17299        let b = _mm256_set1_epi8(1);
17300        let r = _mm256_maskz_avg_epu8(0, a, b);
17301        assert_eq_m256i(r, _mm256_setzero_si256());
17302        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
17303        #[rustfmt::skip]
17304        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17305                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17306        assert_eq_m256i(r, e);
17307    }
17308
17309    #[simd_test(enable = "avx512bw,avx512vl")]
17310    unsafe fn test_mm_mask_avg_epu8() {
17311        let a = _mm_set1_epi8(1);
17312        let b = _mm_set1_epi8(1);
17313        let r = _mm_mask_avg_epu8(a, 0, a, b);
17314        assert_eq_m128i(r, a);
17315        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
17316        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17317        assert_eq_m128i(r, e);
17318    }
17319
17320    #[simd_test(enable = "avx512bw,avx512vl")]
17321    unsafe fn test_mm_maskz_avg_epu8() {
17322        let a = _mm_set1_epi8(1);
17323        let b = _mm_set1_epi8(1);
17324        let r = _mm_maskz_avg_epu8(0, a, b);
17325        assert_eq_m128i(r, _mm_setzero_si128());
17326        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
17327        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17328        assert_eq_m128i(r, e);
17329    }
17330
17331    #[simd_test(enable = "avx512bw")]
17332    unsafe fn test_mm512_sll_epi16() {
17333        let a = _mm512_set1_epi16(1 << 15);
17334        let count = _mm_set1_epi16(2);
17335        let r = _mm512_sll_epi16(a, count);
17336        let e = _mm512_set1_epi16(0);
17337        assert_eq_m512i(r, e);
17338    }
17339
17340    #[simd_test(enable = "avx512bw")]
17341    unsafe fn test_mm512_mask_sll_epi16() {
17342        let a = _mm512_set1_epi16(1 << 15);
17343        let count = _mm_set1_epi16(2);
17344        let r = _mm512_mask_sll_epi16(a, 0, a, count);
17345        assert_eq_m512i(r, a);
17346        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17347        let e = _mm512_set1_epi16(0);
17348        assert_eq_m512i(r, e);
17349    }
17350
17351    #[simd_test(enable = "avx512bw")]
17352    unsafe fn test_mm512_maskz_sll_epi16() {
17353        let a = _mm512_set1_epi16(1 << 15);
17354        let count = _mm_set1_epi16(2);
17355        let r = _mm512_maskz_sll_epi16(0, a, count);
17356        assert_eq_m512i(r, _mm512_setzero_si512());
17357        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
17358        let e = _mm512_set1_epi16(0);
17359        assert_eq_m512i(r, e);
17360    }
17361
17362    #[simd_test(enable = "avx512bw,avx512vl")]
17363    unsafe fn test_mm256_mask_sll_epi16() {
17364        let a = _mm256_set1_epi16(1 << 15);
17365        let count = _mm_set1_epi16(2);
17366        let r = _mm256_mask_sll_epi16(a, 0, a, count);
17367        assert_eq_m256i(r, a);
17368        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
17369        let e = _mm256_set1_epi16(0);
17370        assert_eq_m256i(r, e);
17371    }
17372
17373    #[simd_test(enable = "avx512bw,avx512vl")]
17374    unsafe fn test_mm256_maskz_sll_epi16() {
17375        let a = _mm256_set1_epi16(1 << 15);
17376        let count = _mm_set1_epi16(2);
17377        let r = _mm256_maskz_sll_epi16(0, a, count);
17378        assert_eq_m256i(r, _mm256_setzero_si256());
17379        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
17380        let e = _mm256_set1_epi16(0);
17381        assert_eq_m256i(r, e);
17382    }
17383
17384    #[simd_test(enable = "avx512bw,avx512vl")]
17385    unsafe fn test_mm_mask_sll_epi16() {
17386        let a = _mm_set1_epi16(1 << 15);
17387        let count = _mm_set1_epi16(2);
17388        let r = _mm_mask_sll_epi16(a, 0, a, count);
17389        assert_eq_m128i(r, a);
17390        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
17391        let e = _mm_set1_epi16(0);
17392        assert_eq_m128i(r, e);
17393    }
17394
17395    #[simd_test(enable = "avx512bw,avx512vl")]
17396    unsafe fn test_mm_maskz_sll_epi16() {
17397        let a = _mm_set1_epi16(1 << 15);
17398        let count = _mm_set1_epi16(2);
17399        let r = _mm_maskz_sll_epi16(0, a, count);
17400        assert_eq_m128i(r, _mm_setzero_si128());
17401        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
17402        let e = _mm_set1_epi16(0);
17403        assert_eq_m128i(r, e);
17404    }
17405
17406    #[simd_test(enable = "avx512bw")]
17407    unsafe fn test_mm512_slli_epi16() {
17408        let a = _mm512_set1_epi16(1 << 15);
17409        let r = _mm512_slli_epi16::<1>(a);
17410        let e = _mm512_set1_epi16(0);
17411        assert_eq_m512i(r, e);
17412    }
17413
17414    #[simd_test(enable = "avx512bw")]
17415    unsafe fn test_mm512_mask_slli_epi16() {
17416        let a = _mm512_set1_epi16(1 << 15);
17417        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
17418        assert_eq_m512i(r, a);
17419        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
17420        let e = _mm512_set1_epi16(0);
17421        assert_eq_m512i(r, e);
17422    }
17423
17424    #[simd_test(enable = "avx512bw")]
17425    unsafe fn test_mm512_maskz_slli_epi16() {
17426        let a = _mm512_set1_epi16(1 << 15);
17427        let r = _mm512_maskz_slli_epi16::<1>(0, a);
17428        assert_eq_m512i(r, _mm512_setzero_si512());
17429        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
17430        let e = _mm512_set1_epi16(0);
17431        assert_eq_m512i(r, e);
17432    }
17433
17434    #[simd_test(enable = "avx512bw,avx512vl")]
17435    unsafe fn test_mm256_mask_slli_epi16() {
17436        let a = _mm256_set1_epi16(1 << 15);
17437        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
17438        assert_eq_m256i(r, a);
17439        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
17440        let e = _mm256_set1_epi16(0);
17441        assert_eq_m256i(r, e);
17442    }
17443
17444    #[simd_test(enable = "avx512bw,avx512vl")]
17445    unsafe fn test_mm256_maskz_slli_epi16() {
17446        let a = _mm256_set1_epi16(1 << 15);
17447        let r = _mm256_maskz_slli_epi16::<1>(0, a);
17448        assert_eq_m256i(r, _mm256_setzero_si256());
17449        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
17450        let e = _mm256_set1_epi16(0);
17451        assert_eq_m256i(r, e);
17452    }
17453
17454    #[simd_test(enable = "avx512bw,avx512vl")]
17455    unsafe fn test_mm_mask_slli_epi16() {
17456        let a = _mm_set1_epi16(1 << 15);
17457        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
17458        assert_eq_m128i(r, a);
17459        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
17460        let e = _mm_set1_epi16(0);
17461        assert_eq_m128i(r, e);
17462    }
17463
17464    #[simd_test(enable = "avx512bw,avx512vl")]
17465    unsafe fn test_mm_maskz_slli_epi16() {
17466        let a = _mm_set1_epi16(1 << 15);
17467        let r = _mm_maskz_slli_epi16::<1>(0, a);
17468        assert_eq_m128i(r, _mm_setzero_si128());
17469        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
17470        let e = _mm_set1_epi16(0);
17471        assert_eq_m128i(r, e);
17472    }
17473
17474    #[simd_test(enable = "avx512bw")]
17475    unsafe fn test_mm512_sllv_epi16() {
17476        let a = _mm512_set1_epi16(1 << 15);
17477        let count = _mm512_set1_epi16(2);
17478        let r = _mm512_sllv_epi16(a, count);
17479        let e = _mm512_set1_epi16(0);
17480        assert_eq_m512i(r, e);
17481    }
17482
17483    #[simd_test(enable = "avx512bw")]
17484    unsafe fn test_mm512_mask_sllv_epi16() {
17485        let a = _mm512_set1_epi16(1 << 15);
17486        let count = _mm512_set1_epi16(2);
17487        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
17488        assert_eq_m512i(r, a);
17489        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17490        let e = _mm512_set1_epi16(0);
17491        assert_eq_m512i(r, e);
17492    }
17493
17494    #[simd_test(enable = "avx512bw")]
17495    unsafe fn test_mm512_maskz_sllv_epi16() {
17496        let a = _mm512_set1_epi16(1 << 15);
17497        let count = _mm512_set1_epi16(2);
17498        let r = _mm512_maskz_sllv_epi16(0, a, count);
17499        assert_eq_m512i(r, _mm512_setzero_si512());
17500        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17501        let e = _mm512_set1_epi16(0);
17502        assert_eq_m512i(r, e);
17503    }
17504
17505    #[simd_test(enable = "avx512bw,avx512vl")]
17506    unsafe fn test_mm256_sllv_epi16() {
17507        let a = _mm256_set1_epi16(1 << 15);
17508        let count = _mm256_set1_epi16(2);
17509        let r = _mm256_sllv_epi16(a, count);
17510        let e = _mm256_set1_epi16(0);
17511        assert_eq_m256i(r, e);
17512    }
17513
17514    #[simd_test(enable = "avx512bw,avx512vl")]
17515    unsafe fn test_mm256_mask_sllv_epi16() {
17516        let a = _mm256_set1_epi16(1 << 15);
17517        let count = _mm256_set1_epi16(2);
17518        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
17519        assert_eq_m256i(r, a);
17520        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
17521        let e = _mm256_set1_epi16(0);
17522        assert_eq_m256i(r, e);
17523    }
17524
17525    #[simd_test(enable = "avx512bw,avx512vl")]
17526    unsafe fn test_mm256_maskz_sllv_epi16() {
17527        let a = _mm256_set1_epi16(1 << 15);
17528        let count = _mm256_set1_epi16(2);
17529        let r = _mm256_maskz_sllv_epi16(0, a, count);
17530        assert_eq_m256i(r, _mm256_setzero_si256());
17531        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
17532        let e = _mm256_set1_epi16(0);
17533        assert_eq_m256i(r, e);
17534    }
17535
17536    #[simd_test(enable = "avx512bw,avx512vl")]
17537    unsafe fn test_mm_sllv_epi16() {
17538        let a = _mm_set1_epi16(1 << 15);
17539        let count = _mm_set1_epi16(2);
17540        let r = _mm_sllv_epi16(a, count);
17541        let e = _mm_set1_epi16(0);
17542        assert_eq_m128i(r, e);
17543    }
17544
17545    #[simd_test(enable = "avx512bw,avx512vl")]
17546    unsafe fn test_mm_mask_sllv_epi16() {
17547        let a = _mm_set1_epi16(1 << 15);
17548        let count = _mm_set1_epi16(2);
17549        let r = _mm_mask_sllv_epi16(a, 0, a, count);
17550        assert_eq_m128i(r, a);
17551        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
17552        let e = _mm_set1_epi16(0);
17553        assert_eq_m128i(r, e);
17554    }
17555
17556    #[simd_test(enable = "avx512bw,avx512vl")]
17557    unsafe fn test_mm_maskz_sllv_epi16() {
17558        let a = _mm_set1_epi16(1 << 15);
17559        let count = _mm_set1_epi16(2);
17560        let r = _mm_maskz_sllv_epi16(0, a, count);
17561        assert_eq_m128i(r, _mm_setzero_si128());
17562        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
17563        let e = _mm_set1_epi16(0);
17564        assert_eq_m128i(r, e);
17565    }
17566
17567    #[simd_test(enable = "avx512bw")]
17568    unsafe fn test_mm512_srl_epi16() {
17569        let a = _mm512_set1_epi16(1 << 1);
17570        let count = _mm_set1_epi16(2);
17571        let r = _mm512_srl_epi16(a, count);
17572        let e = _mm512_set1_epi16(0);
17573        assert_eq_m512i(r, e);
17574    }
17575
17576    #[simd_test(enable = "avx512bw")]
17577    unsafe fn test_mm512_mask_srl_epi16() {
17578        let a = _mm512_set1_epi16(1 << 1);
17579        let count = _mm_set1_epi16(2);
17580        let r = _mm512_mask_srl_epi16(a, 0, a, count);
17581        assert_eq_m512i(r, a);
17582        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17583        let e = _mm512_set1_epi16(0);
17584        assert_eq_m512i(r, e);
17585    }
17586
17587    #[simd_test(enable = "avx512bw")]
17588    unsafe fn test_mm512_maskz_srl_epi16() {
17589        let a = _mm512_set1_epi16(1 << 1);
17590        let count = _mm_set1_epi16(2);
17591        let r = _mm512_maskz_srl_epi16(0, a, count);
17592        assert_eq_m512i(r, _mm512_setzero_si512());
17593        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
17594        let e = _mm512_set1_epi16(0);
17595        assert_eq_m512i(r, e);
17596    }
17597
17598    #[simd_test(enable = "avx512bw,avx512vl")]
17599    unsafe fn test_mm256_mask_srl_epi16() {
17600        let a = _mm256_set1_epi16(1 << 1);
17601        let count = _mm_set1_epi16(2);
17602        let r = _mm256_mask_srl_epi16(a, 0, a, count);
17603        assert_eq_m256i(r, a);
17604        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
17605        let e = _mm256_set1_epi16(0);
17606        assert_eq_m256i(r, e);
17607    }
17608
17609    #[simd_test(enable = "avx512bw,avx512vl")]
17610    unsafe fn test_mm256_maskz_srl_epi16() {
17611        let a = _mm256_set1_epi16(1 << 1);
17612        let count = _mm_set1_epi16(2);
17613        let r = _mm256_maskz_srl_epi16(0, a, count);
17614        assert_eq_m256i(r, _mm256_setzero_si256());
17615        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
17616        let e = _mm256_set1_epi16(0);
17617        assert_eq_m256i(r, e);
17618    }
17619
17620    #[simd_test(enable = "avx512bw,avx512vl")]
17621    unsafe fn test_mm_mask_srl_epi16() {
17622        let a = _mm_set1_epi16(1 << 1);
17623        let count = _mm_set1_epi16(2);
17624        let r = _mm_mask_srl_epi16(a, 0, a, count);
17625        assert_eq_m128i(r, a);
17626        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
17627        let e = _mm_set1_epi16(0);
17628        assert_eq_m128i(r, e);
17629    }
17630
17631    #[simd_test(enable = "avx512bw,avx512vl")]
17632    unsafe fn test_mm_maskz_srl_epi16() {
17633        let a = _mm_set1_epi16(1 << 1);
17634        let count = _mm_set1_epi16(2);
17635        let r = _mm_maskz_srl_epi16(0, a, count);
17636        assert_eq_m128i(r, _mm_setzero_si128());
17637        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
17638        let e = _mm_set1_epi16(0);
17639        assert_eq_m128i(r, e);
17640    }
17641
17642    #[simd_test(enable = "avx512bw")]
17643    unsafe fn test_mm512_srli_epi16() {
17644        let a = _mm512_set1_epi16(1 << 1);
17645        let r = _mm512_srli_epi16::<2>(a);
17646        let e = _mm512_set1_epi16(0);
17647        assert_eq_m512i(r, e);
17648    }
17649
17650    #[simd_test(enable = "avx512bw")]
17651    unsafe fn test_mm512_mask_srli_epi16() {
17652        let a = _mm512_set1_epi16(1 << 1);
17653        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
17654        assert_eq_m512i(r, a);
17655        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17656        let e = _mm512_set1_epi16(0);
17657        assert_eq_m512i(r, e);
17658    }
17659
17660    #[simd_test(enable = "avx512bw")]
17661    unsafe fn test_mm512_maskz_srli_epi16() {
17662        let a = _mm512_set1_epi16(1 << 1);
17663        let r = _mm512_maskz_srli_epi16::<2>(0, a);
17664        assert_eq_m512i(r, _mm512_setzero_si512());
17665        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17666        let e = _mm512_set1_epi16(0);
17667        assert_eq_m512i(r, e);
17668    }
17669
17670    #[simd_test(enable = "avx512bw,avx512vl")]
17671    unsafe fn test_mm256_mask_srli_epi16() {
17672        let a = _mm256_set1_epi16(1 << 1);
17673        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
17674        assert_eq_m256i(r, a);
17675        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
17676        let e = _mm256_set1_epi16(0);
17677        assert_eq_m256i(r, e);
17678    }
17679
17680    #[simd_test(enable = "avx512bw,avx512vl")]
17681    unsafe fn test_mm256_maskz_srli_epi16() {
17682        let a = _mm256_set1_epi16(1 << 1);
17683        let r = _mm256_maskz_srli_epi16::<2>(0, a);
17684        assert_eq_m256i(r, _mm256_setzero_si256());
17685        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
17686        let e = _mm256_set1_epi16(0);
17687        assert_eq_m256i(r, e);
17688    }
17689
17690    #[simd_test(enable = "avx512bw,avx512vl")]
17691    unsafe fn test_mm_mask_srli_epi16() {
17692        let a = _mm_set1_epi16(1 << 1);
17693        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
17694        assert_eq_m128i(r, a);
17695        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
17696        let e = _mm_set1_epi16(0);
17697        assert_eq_m128i(r, e);
17698    }
17699
17700    #[simd_test(enable = "avx512bw,avx512vl")]
17701    unsafe fn test_mm_maskz_srli_epi16() {
17702        let a = _mm_set1_epi16(1 << 1);
17703        let r = _mm_maskz_srli_epi16::<2>(0, a);
17704        assert_eq_m128i(r, _mm_setzero_si128());
17705        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
17706        let e = _mm_set1_epi16(0);
17707        assert_eq_m128i(r, e);
17708    }
17709
17710    #[simd_test(enable = "avx512bw")]
17711    unsafe fn test_mm512_srlv_epi16() {
17712        let a = _mm512_set1_epi16(1 << 1);
17713        let count = _mm512_set1_epi16(2);
17714        let r = _mm512_srlv_epi16(a, count);
17715        let e = _mm512_set1_epi16(0);
17716        assert_eq_m512i(r, e);
17717    }
17718
17719    #[simd_test(enable = "avx512bw")]
17720    unsafe fn test_mm512_mask_srlv_epi16() {
17721        let a = _mm512_set1_epi16(1 << 1);
17722        let count = _mm512_set1_epi16(2);
17723        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
17724        assert_eq_m512i(r, a);
17725        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17726        let e = _mm512_set1_epi16(0);
17727        assert_eq_m512i(r, e);
17728    }
17729
17730    #[simd_test(enable = "avx512bw")]
17731    unsafe fn test_mm512_maskz_srlv_epi16() {
17732        let a = _mm512_set1_epi16(1 << 1);
17733        let count = _mm512_set1_epi16(2);
17734        let r = _mm512_maskz_srlv_epi16(0, a, count);
17735        assert_eq_m512i(r, _mm512_setzero_si512());
17736        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17737        let e = _mm512_set1_epi16(0);
17738        assert_eq_m512i(r, e);
17739    }
17740
17741    #[simd_test(enable = "avx512bw,avx512vl")]
17742    unsafe fn test_mm256_srlv_epi16() {
17743        let a = _mm256_set1_epi16(1 << 1);
17744        let count = _mm256_set1_epi16(2);
17745        let r = _mm256_srlv_epi16(a, count);
17746        let e = _mm256_set1_epi16(0);
17747        assert_eq_m256i(r, e);
17748    }
17749
17750    #[simd_test(enable = "avx512bw,avx512vl")]
17751    unsafe fn test_mm256_mask_srlv_epi16() {
17752        let a = _mm256_set1_epi16(1 << 1);
17753        let count = _mm256_set1_epi16(2);
17754        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
17755        assert_eq_m256i(r, a);
17756        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
17757        let e = _mm256_set1_epi16(0);
17758        assert_eq_m256i(r, e);
17759    }
17760
17761    #[simd_test(enable = "avx512bw,avx512vl")]
17762    unsafe fn test_mm256_maskz_srlv_epi16() {
17763        let a = _mm256_set1_epi16(1 << 1);
17764        let count = _mm256_set1_epi16(2);
17765        let r = _mm256_maskz_srlv_epi16(0, a, count);
17766        assert_eq_m256i(r, _mm256_setzero_si256());
17767        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
17768        let e = _mm256_set1_epi16(0);
17769        assert_eq_m256i(r, e);
17770    }
17771
17772    #[simd_test(enable = "avx512bw,avx512vl")]
17773    unsafe fn test_mm_srlv_epi16() {
17774        let a = _mm_set1_epi16(1 << 1);
17775        let count = _mm_set1_epi16(2);
17776        let r = _mm_srlv_epi16(a, count);
17777        let e = _mm_set1_epi16(0);
17778        assert_eq_m128i(r, e);
17779    }
17780
17781    #[simd_test(enable = "avx512bw,avx512vl")]
17782    unsafe fn test_mm_mask_srlv_epi16() {
17783        let a = _mm_set1_epi16(1 << 1);
17784        let count = _mm_set1_epi16(2);
17785        let r = _mm_mask_srlv_epi16(a, 0, a, count);
17786        assert_eq_m128i(r, a);
17787        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
17788        let e = _mm_set1_epi16(0);
17789        assert_eq_m128i(r, e);
17790    }
17791
17792    #[simd_test(enable = "avx512bw,avx512vl")]
17793    unsafe fn test_mm_maskz_srlv_epi16() {
17794        let a = _mm_set1_epi16(1 << 1);
17795        let count = _mm_set1_epi16(2);
17796        let r = _mm_maskz_srlv_epi16(0, a, count);
17797        assert_eq_m128i(r, _mm_setzero_si128());
17798        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
17799        let e = _mm_set1_epi16(0);
17800        assert_eq_m128i(r, e);
17801    }
17802
17803    #[simd_test(enable = "avx512bw")]
17804    unsafe fn test_mm512_sra_epi16() {
17805        let a = _mm512_set1_epi16(8);
17806        let count = _mm_set1_epi16(1);
17807        let r = _mm512_sra_epi16(a, count);
17808        let e = _mm512_set1_epi16(0);
17809        assert_eq_m512i(r, e);
17810    }
17811
17812    #[simd_test(enable = "avx512bw")]
17813    unsafe fn test_mm512_mask_sra_epi16() {
17814        let a = _mm512_set1_epi16(8);
17815        let count = _mm_set1_epi16(1);
17816        let r = _mm512_mask_sra_epi16(a, 0, a, count);
17817        assert_eq_m512i(r, a);
17818        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17819        let e = _mm512_set1_epi16(0);
17820        assert_eq_m512i(r, e);
17821    }
17822
17823    #[simd_test(enable = "avx512bw")]
17824    unsafe fn test_mm512_maskz_sra_epi16() {
17825        let a = _mm512_set1_epi16(8);
17826        let count = _mm_set1_epi16(1);
17827        let r = _mm512_maskz_sra_epi16(0, a, count);
17828        assert_eq_m512i(r, _mm512_setzero_si512());
17829        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
17830        let e = _mm512_set1_epi16(0);
17831        assert_eq_m512i(r, e);
17832    }
17833
17834    #[simd_test(enable = "avx512bw,avx512vl")]
17835    unsafe fn test_mm256_mask_sra_epi16() {
17836        let a = _mm256_set1_epi16(8);
17837        let count = _mm_set1_epi16(1);
17838        let r = _mm256_mask_sra_epi16(a, 0, a, count);
17839        assert_eq_m256i(r, a);
17840        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
17841        let e = _mm256_set1_epi16(0);
17842        assert_eq_m256i(r, e);
17843    }
17844
17845    #[simd_test(enable = "avx512bw,avx512vl")]
17846    unsafe fn test_mm256_maskz_sra_epi16() {
17847        let a = _mm256_set1_epi16(8);
17848        let count = _mm_set1_epi16(1);
17849        let r = _mm256_maskz_sra_epi16(0, a, count);
17850        assert_eq_m256i(r, _mm256_setzero_si256());
17851        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
17852        let e = _mm256_set1_epi16(0);
17853        assert_eq_m256i(r, e);
17854    }
17855
17856    #[simd_test(enable = "avx512bw,avx512vl")]
17857    unsafe fn test_mm_mask_sra_epi16() {
17858        let a = _mm_set1_epi16(8);
17859        let count = _mm_set1_epi16(1);
17860        let r = _mm_mask_sra_epi16(a, 0, a, count);
17861        assert_eq_m128i(r, a);
17862        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
17863        let e = _mm_set1_epi16(0);
17864        assert_eq_m128i(r, e);
17865    }
17866
17867    #[simd_test(enable = "avx512bw,avx512vl")]
17868    unsafe fn test_mm_maskz_sra_epi16() {
17869        let a = _mm_set1_epi16(8);
17870        let count = _mm_set1_epi16(1);
17871        let r = _mm_maskz_sra_epi16(0, a, count);
17872        assert_eq_m128i(r, _mm_setzero_si128());
17873        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
17874        let e = _mm_set1_epi16(0);
17875        assert_eq_m128i(r, e);
17876    }
17877
17878    #[simd_test(enable = "avx512bw")]
17879    unsafe fn test_mm512_srai_epi16() {
17880        let a = _mm512_set1_epi16(8);
17881        let r = _mm512_srai_epi16::<2>(a);
17882        let e = _mm512_set1_epi16(2);
17883        assert_eq_m512i(r, e);
17884    }
17885
17886    #[simd_test(enable = "avx512bw")]
17887    unsafe fn test_mm512_mask_srai_epi16() {
17888        let a = _mm512_set1_epi16(8);
17889        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
17890        assert_eq_m512i(r, a);
17891        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17892        let e = _mm512_set1_epi16(2);
17893        assert_eq_m512i(r, e);
17894    }
17895
17896    #[simd_test(enable = "avx512bw")]
17897    unsafe fn test_mm512_maskz_srai_epi16() {
17898        let a = _mm512_set1_epi16(8);
17899        let r = _mm512_maskz_srai_epi16::<2>(0, a);
17900        assert_eq_m512i(r, _mm512_setzero_si512());
17901        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17902        let e = _mm512_set1_epi16(2);
17903        assert_eq_m512i(r, e);
17904    }
17905
17906    #[simd_test(enable = "avx512bw,avx512vl")]
17907    unsafe fn test_mm256_mask_srai_epi16() {
17908        let a = _mm256_set1_epi16(8);
17909        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
17910        assert_eq_m256i(r, a);
17911        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
17912        let e = _mm256_set1_epi16(2);
17913        assert_eq_m256i(r, e);
17914    }
17915
17916    #[simd_test(enable = "avx512bw,avx512vl")]
17917    unsafe fn test_mm256_maskz_srai_epi16() {
17918        let a = _mm256_set1_epi16(8);
17919        let r = _mm256_maskz_srai_epi16::<2>(0, a);
17920        assert_eq_m256i(r, _mm256_setzero_si256());
17921        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
17922        let e = _mm256_set1_epi16(2);
17923        assert_eq_m256i(r, e);
17924    }
17925
17926    #[simd_test(enable = "avx512bw,avx512vl")]
17927    unsafe fn test_mm_mask_srai_epi16() {
17928        let a = _mm_set1_epi16(8);
17929        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
17930        assert_eq_m128i(r, a);
17931        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
17932        let e = _mm_set1_epi16(2);
17933        assert_eq_m128i(r, e);
17934    }
17935
17936    #[simd_test(enable = "avx512bw,avx512vl")]
17937    unsafe fn test_mm_maskz_srai_epi16() {
17938        let a = _mm_set1_epi16(8);
17939        let r = _mm_maskz_srai_epi16::<2>(0, a);
17940        assert_eq_m128i(r, _mm_setzero_si128());
17941        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
17942        let e = _mm_set1_epi16(2);
17943        assert_eq_m128i(r, e);
17944    }
17945
17946    #[simd_test(enable = "avx512bw")]
17947    unsafe fn test_mm512_srav_epi16() {
17948        let a = _mm512_set1_epi16(8);
17949        let count = _mm512_set1_epi16(2);
17950        let r = _mm512_srav_epi16(a, count);
17951        let e = _mm512_set1_epi16(2);
17952        assert_eq_m512i(r, e);
17953    }
17954
17955    #[simd_test(enable = "avx512bw")]
17956    unsafe fn test_mm512_mask_srav_epi16() {
17957        let a = _mm512_set1_epi16(8);
17958        let count = _mm512_set1_epi16(2);
17959        let r = _mm512_mask_srav_epi16(a, 0, a, count);
17960        assert_eq_m512i(r, a);
17961        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17962        let e = _mm512_set1_epi16(2);
17963        assert_eq_m512i(r, e);
17964    }
17965
17966    #[simd_test(enable = "avx512bw")]
17967    unsafe fn test_mm512_maskz_srav_epi16() {
17968        let a = _mm512_set1_epi16(8);
17969        let count = _mm512_set1_epi16(2);
17970        let r = _mm512_maskz_srav_epi16(0, a, count);
17971        assert_eq_m512i(r, _mm512_setzero_si512());
17972        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
17973        let e = _mm512_set1_epi16(2);
17974        assert_eq_m512i(r, e);
17975    }
17976
17977    #[simd_test(enable = "avx512bw,avx512vl")]
17978    unsafe fn test_mm256_srav_epi16() {
17979        let a = _mm256_set1_epi16(8);
17980        let count = _mm256_set1_epi16(2);
17981        let r = _mm256_srav_epi16(a, count);
17982        let e = _mm256_set1_epi16(2);
17983        assert_eq_m256i(r, e);
17984    }
17985
17986    #[simd_test(enable = "avx512bw,avx512vl")]
17987    unsafe fn test_mm256_mask_srav_epi16() {
17988        let a = _mm256_set1_epi16(8);
17989        let count = _mm256_set1_epi16(2);
17990        let r = _mm256_mask_srav_epi16(a, 0, a, count);
17991        assert_eq_m256i(r, a);
17992        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
17993        let e = _mm256_set1_epi16(2);
17994        assert_eq_m256i(r, e);
17995    }
17996
17997    #[simd_test(enable = "avx512bw,avx512vl")]
17998    unsafe fn test_mm256_maskz_srav_epi16() {
17999        let a = _mm256_set1_epi16(8);
18000        let count = _mm256_set1_epi16(2);
18001        let r = _mm256_maskz_srav_epi16(0, a, count);
18002        assert_eq_m256i(r, _mm256_setzero_si256());
18003        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
18004        let e = _mm256_set1_epi16(2);
18005        assert_eq_m256i(r, e);
18006    }
18007
18008    #[simd_test(enable = "avx512bw,avx512vl")]
18009    unsafe fn test_mm_srav_epi16() {
18010        let a = _mm_set1_epi16(8);
18011        let count = _mm_set1_epi16(2);
18012        let r = _mm_srav_epi16(a, count);
18013        let e = _mm_set1_epi16(2);
18014        assert_eq_m128i(r, e);
18015    }
18016
18017    #[simd_test(enable = "avx512bw,avx512vl")]
18018    unsafe fn test_mm_mask_srav_epi16() {
18019        let a = _mm_set1_epi16(8);
18020        let count = _mm_set1_epi16(2);
18021        let r = _mm_mask_srav_epi16(a, 0, a, count);
18022        assert_eq_m128i(r, a);
18023        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18024        let e = _mm_set1_epi16(2);
18025        assert_eq_m128i(r, e);
18026    }
18027
18028    #[simd_test(enable = "avx512bw,avx512vl")]
18029    unsafe fn test_mm_maskz_srav_epi16() {
18030        let a = _mm_set1_epi16(8);
18031        let count = _mm_set1_epi16(2);
18032        let r = _mm_maskz_srav_epi16(0, a, count);
18033        assert_eq_m128i(r, _mm_setzero_si128());
18034        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18035        let e = _mm_set1_epi16(2);
18036        assert_eq_m128i(r, e);
18037    }
18038
18039    #[simd_test(enable = "avx512bw")]
18040    unsafe fn test_mm512_permutex2var_epi16() {
18041        #[rustfmt::skip]
18042        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18043                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18044        #[rustfmt::skip]
18045        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18046                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18047        let b = _mm512_set1_epi16(100);
18048        let r = _mm512_permutex2var_epi16(a, idx, b);
18049        #[rustfmt::skip]
18050        let e = _mm512_set_epi16(
18051            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18052            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18053        );
18054        assert_eq_m512i(r, e);
18055    }
18056
18057    #[simd_test(enable = "avx512bw")]
18058    unsafe fn test_mm512_mask_permutex2var_epi16() {
18059        #[rustfmt::skip]
18060        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18061                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18062        #[rustfmt::skip]
18063        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18064                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18065        let b = _mm512_set1_epi16(100);
18066        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18067        assert_eq_m512i(r, a);
18068        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18069        #[rustfmt::skip]
18070        let e = _mm512_set_epi16(
18071            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18072            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18073        );
18074        assert_eq_m512i(r, e);
18075    }
18076
18077    #[simd_test(enable = "avx512bw")]
18078    unsafe fn test_mm512_maskz_permutex2var_epi16() {
18079        #[rustfmt::skip]
18080        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18081                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18082        #[rustfmt::skip]
18083        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18084                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18085        let b = _mm512_set1_epi16(100);
18086        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18087        assert_eq_m512i(r, _mm512_setzero_si512());
18088        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18089        #[rustfmt::skip]
18090        let e = _mm512_set_epi16(
18091            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18092            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18093        );
18094        assert_eq_m512i(r, e);
18095    }
18096
18097    #[simd_test(enable = "avx512bw")]
18098    unsafe fn test_mm512_mask2_permutex2var_epi16() {
18099        #[rustfmt::skip]
18100        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18101                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18102        #[rustfmt::skip]
18103        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18104                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18105        let b = _mm512_set1_epi16(100);
18106        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
18107        assert_eq_m512i(r, idx);
18108        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
18109        #[rustfmt::skip]
18110        let e = _mm512_set_epi16(
18111            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18112            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18113        );
18114        assert_eq_m512i(r, e);
18115    }
18116
18117    #[simd_test(enable = "avx512bw,avx512vl")]
18118    unsafe fn test_mm256_permutex2var_epi16() {
18119        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18120        #[rustfmt::skip]
18121        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18122        let b = _mm256_set1_epi16(100);
18123        let r = _mm256_permutex2var_epi16(a, idx, b);
18124        let e = _mm256_set_epi16(
18125            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18126        );
18127        assert_eq_m256i(r, e);
18128    }
18129
18130    #[simd_test(enable = "avx512bw,avx512vl")]
18131    unsafe fn test_mm256_mask_permutex2var_epi16() {
18132        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18133        #[rustfmt::skip]
18134        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18135        let b = _mm256_set1_epi16(100);
18136        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
18137        assert_eq_m256i(r, a);
18138        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
18139        let e = _mm256_set_epi16(
18140            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18141        );
18142        assert_eq_m256i(r, e);
18143    }
18144
18145    #[simd_test(enable = "avx512bw,avx512vl")]
18146    unsafe fn test_mm256_maskz_permutex2var_epi16() {
18147        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18148        #[rustfmt::skip]
18149        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18150        let b = _mm256_set1_epi16(100);
18151        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
18152        assert_eq_m256i(r, _mm256_setzero_si256());
18153        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
18154        let e = _mm256_set_epi16(
18155            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18156        );
18157        assert_eq_m256i(r, e);
18158    }
18159
18160    #[simd_test(enable = "avx512bw,avx512vl")]
18161    unsafe fn test_mm256_mask2_permutex2var_epi16() {
18162        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18163        #[rustfmt::skip]
18164        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18165        let b = _mm256_set1_epi16(100);
18166        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
18167        assert_eq_m256i(r, idx);
18168        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
18169        #[rustfmt::skip]
18170        let e = _mm256_set_epi16(
18171            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18172        );
18173        assert_eq_m256i(r, e);
18174    }
18175
18176    #[simd_test(enable = "avx512bw,avx512vl")]
18177    unsafe fn test_mm_permutex2var_epi16() {
18178        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18179        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18180        let b = _mm_set1_epi16(100);
18181        let r = _mm_permutex2var_epi16(a, idx, b);
18182        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18183        assert_eq_m128i(r, e);
18184    }
18185
18186    #[simd_test(enable = "avx512bw,avx512vl")]
18187    unsafe fn test_mm_mask_permutex2var_epi16() {
18188        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18189        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18190        let b = _mm_set1_epi16(100);
18191        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
18192        assert_eq_m128i(r, a);
18193        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
18194        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18195        assert_eq_m128i(r, e);
18196    }
18197
18198    #[simd_test(enable = "avx512bw,avx512vl")]
18199    unsafe fn test_mm_maskz_permutex2var_epi16() {
18200        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18201        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18202        let b = _mm_set1_epi16(100);
18203        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
18204        assert_eq_m128i(r, _mm_setzero_si128());
18205        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
18206        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18207        assert_eq_m128i(r, e);
18208    }
18209
18210    #[simd_test(enable = "avx512bw,avx512vl")]
18211    unsafe fn test_mm_mask2_permutex2var_epi16() {
18212        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18213        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18214        let b = _mm_set1_epi16(100);
18215        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
18216        assert_eq_m128i(r, idx);
18217        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
18218        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18219        assert_eq_m128i(r, e);
18220    }
18221
18222    #[simd_test(enable = "avx512bw")]
18223    unsafe fn test_mm512_permutexvar_epi16() {
18224        let idx = _mm512_set1_epi16(1);
18225        #[rustfmt::skip]
18226        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18227                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18228        let r = _mm512_permutexvar_epi16(idx, a);
18229        let e = _mm512_set1_epi16(30);
18230        assert_eq_m512i(r, e);
18231    }
18232
18233    #[simd_test(enable = "avx512bw")]
18234    unsafe fn test_mm512_mask_permutexvar_epi16() {
18235        let idx = _mm512_set1_epi16(1);
18236        #[rustfmt::skip]
18237        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18238                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18239        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
18240        assert_eq_m512i(r, a);
18241        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
18242        let e = _mm512_set1_epi16(30);
18243        assert_eq_m512i(r, e);
18244    }
18245
18246    #[simd_test(enable = "avx512bw")]
18247    unsafe fn test_mm512_maskz_permutexvar_epi16() {
18248        let idx = _mm512_set1_epi16(1);
18249        #[rustfmt::skip]
18250        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18251                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18252        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
18253        assert_eq_m512i(r, _mm512_setzero_si512());
18254        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
18255        let e = _mm512_set1_epi16(30);
18256        assert_eq_m512i(r, e);
18257    }
18258
18259    #[simd_test(enable = "avx512bw,avx512vl")]
18260    unsafe fn test_mm256_permutexvar_epi16() {
18261        let idx = _mm256_set1_epi16(1);
18262        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18263        let r = _mm256_permutexvar_epi16(idx, a);
18264        let e = _mm256_set1_epi16(14);
18265        assert_eq_m256i(r, e);
18266    }
18267
18268    #[simd_test(enable = "avx512bw,avx512vl")]
18269    unsafe fn test_mm256_mask_permutexvar_epi16() {
18270        let idx = _mm256_set1_epi16(1);
18271        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18272        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
18273        assert_eq_m256i(r, a);
18274        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
18275        let e = _mm256_set1_epi16(14);
18276        assert_eq_m256i(r, e);
18277    }
18278
18279    #[simd_test(enable = "avx512bw,avx512vl")]
18280    unsafe fn test_mm256_maskz_permutexvar_epi16() {
18281        let idx = _mm256_set1_epi16(1);
18282        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18283        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
18284        assert_eq_m256i(r, _mm256_setzero_si256());
18285        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
18286        let e = _mm256_set1_epi16(14);
18287        assert_eq_m256i(r, e);
18288    }
18289
18290    #[simd_test(enable = "avx512bw,avx512vl")]
18291    unsafe fn test_mm_permutexvar_epi16() {
18292        let idx = _mm_set1_epi16(1);
18293        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18294        let r = _mm_permutexvar_epi16(idx, a);
18295        let e = _mm_set1_epi16(6);
18296        assert_eq_m128i(r, e);
18297    }
18298
18299    #[simd_test(enable = "avx512bw,avx512vl")]
18300    unsafe fn test_mm_mask_permutexvar_epi16() {
18301        let idx = _mm_set1_epi16(1);
18302        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18303        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
18304        assert_eq_m128i(r, a);
18305        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
18306        let e = _mm_set1_epi16(6);
18307        assert_eq_m128i(r, e);
18308    }
18309
18310    #[simd_test(enable = "avx512bw,avx512vl")]
18311    unsafe fn test_mm_maskz_permutexvar_epi16() {
18312        let idx = _mm_set1_epi16(1);
18313        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18314        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
18315        assert_eq_m128i(r, _mm_setzero_si128());
18316        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
18317        let e = _mm_set1_epi16(6);
18318        assert_eq_m128i(r, e);
18319    }
18320
18321    #[simd_test(enable = "avx512bw")]
18322    unsafe fn test_mm512_mask_blend_epi16() {
18323        let a = _mm512_set1_epi16(1);
18324        let b = _mm512_set1_epi16(2);
18325        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
18326        #[rustfmt::skip]
18327        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18328                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18329        assert_eq_m512i(r, e);
18330    }
18331
18332    #[simd_test(enable = "avx512bw,avx512vl")]
18333    unsafe fn test_mm256_mask_blend_epi16() {
18334        let a = _mm256_set1_epi16(1);
18335        let b = _mm256_set1_epi16(2);
18336        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
18337        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18338        assert_eq_m256i(r, e);
18339    }
18340
18341    #[simd_test(enable = "avx512bw,avx512vl")]
18342    unsafe fn test_mm_mask_blend_epi16() {
18343        let a = _mm_set1_epi16(1);
18344        let b = _mm_set1_epi16(2);
18345        let r = _mm_mask_blend_epi16(0b11110000, a, b);
18346        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
18347        assert_eq_m128i(r, e);
18348    }
18349
18350    #[simd_test(enable = "avx512bw")]
18351    unsafe fn test_mm512_mask_blend_epi8() {
18352        let a = _mm512_set1_epi8(1);
18353        let b = _mm512_set1_epi8(2);
18354        let r = _mm512_mask_blend_epi8(
18355            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
18356            a,
18357            b,
18358        );
18359        #[rustfmt::skip]
18360        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18361                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18362                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18363                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18364        assert_eq_m512i(r, e);
18365    }
18366
18367    #[simd_test(enable = "avx512bw,avx512vl")]
18368    unsafe fn test_mm256_mask_blend_epi8() {
18369        let a = _mm256_set1_epi8(1);
18370        let b = _mm256_set1_epi8(2);
18371        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
18372        #[rustfmt::skip]
18373        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18374                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18375        assert_eq_m256i(r, e);
18376    }
18377
18378    #[simd_test(enable = "avx512bw,avx512vl")]
18379    unsafe fn test_mm_mask_blend_epi8() {
18380        let a = _mm_set1_epi8(1);
18381        let b = _mm_set1_epi8(2);
18382        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
18383        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18384        assert_eq_m128i(r, e);
18385    }
18386
18387    #[simd_test(enable = "avx512bw")]
18388    unsafe fn test_mm512_broadcastw_epi16() {
18389        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18390        let r = _mm512_broadcastw_epi16(a);
18391        let e = _mm512_set1_epi16(24);
18392        assert_eq_m512i(r, e);
18393    }
18394
18395    #[simd_test(enable = "avx512bw")]
18396    unsafe fn test_mm512_mask_broadcastw_epi16() {
18397        let src = _mm512_set1_epi16(1);
18398        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18399        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
18400        assert_eq_m512i(r, src);
18401        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18402        let e = _mm512_set1_epi16(24);
18403        assert_eq_m512i(r, e);
18404    }
18405
18406    #[simd_test(enable = "avx512bw")]
18407    unsafe fn test_mm512_maskz_broadcastw_epi16() {
18408        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18409        let r = _mm512_maskz_broadcastw_epi16(0, a);
18410        assert_eq_m512i(r, _mm512_setzero_si512());
18411        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
18412        let e = _mm512_set1_epi16(24);
18413        assert_eq_m512i(r, e);
18414    }
18415
18416    #[simd_test(enable = "avx512bw,avx512vl")]
18417    unsafe fn test_mm256_mask_broadcastw_epi16() {
18418        let src = _mm256_set1_epi16(1);
18419        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18420        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
18421        assert_eq_m256i(r, src);
18422        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
18423        let e = _mm256_set1_epi16(24);
18424        assert_eq_m256i(r, e);
18425    }
18426
18427    #[simd_test(enable = "avx512bw,avx512vl")]
18428    unsafe fn test_mm256_maskz_broadcastw_epi16() {
18429        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18430        let r = _mm256_maskz_broadcastw_epi16(0, a);
18431        assert_eq_m256i(r, _mm256_setzero_si256());
18432        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
18433        let e = _mm256_set1_epi16(24);
18434        assert_eq_m256i(r, e);
18435    }
18436
18437    #[simd_test(enable = "avx512bw,avx512vl")]
18438    unsafe fn test_mm_mask_broadcastw_epi16() {
18439        let src = _mm_set1_epi16(1);
18440        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18441        let r = _mm_mask_broadcastw_epi16(src, 0, a);
18442        assert_eq_m128i(r, src);
18443        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
18444        let e = _mm_set1_epi16(24);
18445        assert_eq_m128i(r, e);
18446    }
18447
18448    #[simd_test(enable = "avx512bw,avx512vl")]
18449    unsafe fn test_mm_maskz_broadcastw_epi16() {
18450        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18451        let r = _mm_maskz_broadcastw_epi16(0, a);
18452        assert_eq_m128i(r, _mm_setzero_si128());
18453        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
18454        let e = _mm_set1_epi16(24);
18455        assert_eq_m128i(r, e);
18456    }
18457
18458    #[simd_test(enable = "avx512bw")]
18459    unsafe fn test_mm512_broadcastb_epi8() {
18460        let a = _mm_set_epi8(
18461            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18462        );
18463        let r = _mm512_broadcastb_epi8(a);
18464        let e = _mm512_set1_epi8(32);
18465        assert_eq_m512i(r, e);
18466    }
18467
18468    #[simd_test(enable = "avx512bw")]
18469    unsafe fn test_mm512_mask_broadcastb_epi8() {
18470        let src = _mm512_set1_epi8(1);
18471        let a = _mm_set_epi8(
18472            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18473        );
18474        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
18475        assert_eq_m512i(r, src);
18476        let r = _mm512_mask_broadcastb_epi8(
18477            src,
18478            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18479            a,
18480        );
18481        let e = _mm512_set1_epi8(32);
18482        assert_eq_m512i(r, e);
18483    }
18484
18485    #[simd_test(enable = "avx512bw")]
18486    unsafe fn test_mm512_maskz_broadcastb_epi8() {
18487        let a = _mm_set_epi8(
18488            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18489        );
18490        let r = _mm512_maskz_broadcastb_epi8(0, a);
18491        assert_eq_m512i(r, _mm512_setzero_si512());
18492        let r = _mm512_maskz_broadcastb_epi8(
18493            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18494            a,
18495        );
18496        let e = _mm512_set1_epi8(32);
18497        assert_eq_m512i(r, e);
18498    }
18499
18500    #[simd_test(enable = "avx512bw,avx512vl")]
18501    unsafe fn test_mm256_mask_broadcastb_epi8() {
18502        let src = _mm256_set1_epi8(1);
18503        let a = _mm_set_epi8(
18504            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18505        );
18506        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
18507        assert_eq_m256i(r, src);
18508        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18509        let e = _mm256_set1_epi8(32);
18510        assert_eq_m256i(r, e);
18511    }
18512
18513    #[simd_test(enable = "avx512bw,avx512vl")]
18514    unsafe fn test_mm256_maskz_broadcastb_epi8() {
18515        let a = _mm_set_epi8(
18516            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18517        );
18518        let r = _mm256_maskz_broadcastb_epi8(0, a);
18519        assert_eq_m256i(r, _mm256_setzero_si256());
18520        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
18521        let e = _mm256_set1_epi8(32);
18522        assert_eq_m256i(r, e);
18523    }
18524
18525    #[simd_test(enable = "avx512bw,avx512vl")]
18526    unsafe fn test_mm_mask_broadcastb_epi8() {
18527        let src = _mm_set1_epi8(1);
18528        let a = _mm_set_epi8(
18529            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18530        );
18531        let r = _mm_mask_broadcastb_epi8(src, 0, a);
18532        assert_eq_m128i(r, src);
18533        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
18534        let e = _mm_set1_epi8(32);
18535        assert_eq_m128i(r, e);
18536    }
18537
18538    #[simd_test(enable = "avx512bw,avx512vl")]
18539    unsafe fn test_mm_maskz_broadcastb_epi8() {
18540        let a = _mm_set_epi8(
18541            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18542        );
18543        let r = _mm_maskz_broadcastb_epi8(0, a);
18544        assert_eq_m128i(r, _mm_setzero_si128());
18545        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
18546        let e = _mm_set1_epi8(32);
18547        assert_eq_m128i(r, e);
18548    }
18549
18550    #[simd_test(enable = "avx512bw")]
18551    unsafe fn test_mm512_unpackhi_epi16() {
18552        #[rustfmt::skip]
18553        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18554                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18555        #[rustfmt::skip]
18556        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18557                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18558        let r = _mm512_unpackhi_epi16(a, b);
18559        #[rustfmt::skip]
18560        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18561                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18562        assert_eq_m512i(r, e);
18563    }
18564
18565    #[simd_test(enable = "avx512bw")]
18566    unsafe fn test_mm512_mask_unpackhi_epi16() {
18567        #[rustfmt::skip]
18568        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18569                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18570        #[rustfmt::skip]
18571        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18572                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18573        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
18574        assert_eq_m512i(r, a);
18575        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18576        #[rustfmt::skip]
18577        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18578                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18579        assert_eq_m512i(r, e);
18580    }
18581
18582    #[simd_test(enable = "avx512bw")]
18583    unsafe fn test_mm512_maskz_unpackhi_epi16() {
18584        #[rustfmt::skip]
18585        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18586                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18587        #[rustfmt::skip]
18588        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18589                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18590        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
18591        assert_eq_m512i(r, _mm512_setzero_si512());
18592        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
18593        #[rustfmt::skip]
18594        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18595                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18596        assert_eq_m512i(r, e);
18597    }
18598
18599    #[simd_test(enable = "avx512bw,avx512vl")]
18600    unsafe fn test_mm256_mask_unpackhi_epi16() {
18601        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18602        let b = _mm256_set_epi16(
18603            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18604        );
18605        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
18606        assert_eq_m256i(r, a);
18607        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
18608        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18609        assert_eq_m256i(r, e);
18610    }
18611
18612    #[simd_test(enable = "avx512bw,avx512vl")]
18613    unsafe fn test_mm256_maskz_unpackhi_epi16() {
18614        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18615        let b = _mm256_set_epi16(
18616            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18617        );
18618        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
18619        assert_eq_m256i(r, _mm256_setzero_si256());
18620        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
18621        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18622        assert_eq_m256i(r, e);
18623    }
18624
18625    #[simd_test(enable = "avx512bw,avx512vl")]
18626    unsafe fn test_mm_mask_unpackhi_epi16() {
18627        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18628        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18629        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
18630        assert_eq_m128i(r, a);
18631        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
18632        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18633        assert_eq_m128i(r, e);
18634    }
18635
18636    #[simd_test(enable = "avx512bw,avx512vl")]
18637    unsafe fn test_mm_maskz_unpackhi_epi16() {
18638        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18639        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18640        let r = _mm_maskz_unpackhi_epi16(0, a, b);
18641        assert_eq_m128i(r, _mm_setzero_si128());
18642        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
18643        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18644        assert_eq_m128i(r, e);
18645    }
18646
18647    #[simd_test(enable = "avx512bw")]
18648    unsafe fn test_mm512_unpackhi_epi8() {
18649        #[rustfmt::skip]
18650        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18651                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18652                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18653                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18654        #[rustfmt::skip]
18655        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18656                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18657                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18658                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18659        let r = _mm512_unpackhi_epi8(a, b);
18660        #[rustfmt::skip]
18661        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18662                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18663                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18664                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18665        assert_eq_m512i(r, e);
18666    }
18667
18668    #[simd_test(enable = "avx512bw")]
18669    unsafe fn test_mm512_mask_unpackhi_epi8() {
18670        #[rustfmt::skip]
18671        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18672                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18673                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18674                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18675        #[rustfmt::skip]
18676        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18677                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18678                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18679                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18680        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
18681        assert_eq_m512i(r, a);
18682        let r = _mm512_mask_unpackhi_epi8(
18683            a,
18684            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18685            a,
18686            b,
18687        );
18688        #[rustfmt::skip]
18689        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18690                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18691                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18692                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18693        assert_eq_m512i(r, e);
18694    }
18695
18696    #[simd_test(enable = "avx512bw")]
18697    unsafe fn test_mm512_maskz_unpackhi_epi8() {
18698        #[rustfmt::skip]
18699        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18700                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18701                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18702                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18703        #[rustfmt::skip]
18704        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18705                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18706                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18707                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18708        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
18709        assert_eq_m512i(r, _mm512_setzero_si512());
18710        let r = _mm512_maskz_unpackhi_epi8(
18711            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18712            a,
18713            b,
18714        );
18715        #[rustfmt::skip]
18716        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18717                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18718                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18719                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18720        assert_eq_m512i(r, e);
18721    }
18722
18723    #[simd_test(enable = "avx512bw,avx512vl")]
18724    unsafe fn test_mm256_mask_unpackhi_epi8() {
18725        #[rustfmt::skip]
18726        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18727                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18728        #[rustfmt::skip]
18729        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18730                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18731        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
18732        assert_eq_m256i(r, a);
18733        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18734        #[rustfmt::skip]
18735        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18736                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18737        assert_eq_m256i(r, e);
18738    }
18739
18740    #[simd_test(enable = "avx512bw,avx512vl")]
18741    unsafe fn test_mm256_maskz_unpackhi_epi8() {
18742        #[rustfmt::skip]
18743        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18744                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18745        #[rustfmt::skip]
18746        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18747                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18748        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
18749        assert_eq_m256i(r, _mm256_setzero_si256());
18750        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
18751        #[rustfmt::skip]
18752        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18753                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18754        assert_eq_m256i(r, e);
18755    }
18756
18757    #[simd_test(enable = "avx512bw,avx512vl")]
18758    unsafe fn test_mm_mask_unpackhi_epi8() {
18759        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18760        let b = _mm_set_epi8(
18761            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18762        );
18763        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
18764        assert_eq_m128i(r, a);
18765        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
18766        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18767        assert_eq_m128i(r, e);
18768    }
18769
18770    #[simd_test(enable = "avx512bw,avx512vl")]
18771    unsafe fn test_mm_maskz_unpackhi_epi8() {
18772        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18773        let b = _mm_set_epi8(
18774            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18775        );
18776        let r = _mm_maskz_unpackhi_epi8(0, a, b);
18777        assert_eq_m128i(r, _mm_setzero_si128());
18778        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
18779        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18780        assert_eq_m128i(r, e);
18781    }
18782
18783    #[simd_test(enable = "avx512bw")]
18784    unsafe fn test_mm512_unpacklo_epi16() {
18785        #[rustfmt::skip]
18786        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18787                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18788        #[rustfmt::skip]
18789        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18790                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18791        let r = _mm512_unpacklo_epi16(a, b);
18792        #[rustfmt::skip]
18793        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18794                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18795        assert_eq_m512i(r, e);
18796    }
18797
18798    #[simd_test(enable = "avx512bw")]
18799    unsafe fn test_mm512_mask_unpacklo_epi16() {
18800        #[rustfmt::skip]
18801        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18802                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18803        #[rustfmt::skip]
18804        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18805                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18806        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
18807        assert_eq_m512i(r, a);
18808        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18809        #[rustfmt::skip]
18810        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18811                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18812        assert_eq_m512i(r, e);
18813    }
18814
18815    #[simd_test(enable = "avx512bw")]
18816    unsafe fn test_mm512_maskz_unpacklo_epi16() {
18817        #[rustfmt::skip]
18818        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18819                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18820        #[rustfmt::skip]
18821        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18822                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18823        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
18824        assert_eq_m512i(r, _mm512_setzero_si512());
18825        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
18826        #[rustfmt::skip]
18827        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18828                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18829        assert_eq_m512i(r, e);
18830    }
18831
18832    #[simd_test(enable = "avx512bw,avx512vl")]
18833    unsafe fn test_mm256_mask_unpacklo_epi16() {
18834        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18835        let b = _mm256_set_epi16(
18836            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18837        );
18838        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
18839        assert_eq_m256i(r, a);
18840        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
18841        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18842        assert_eq_m256i(r, e);
18843    }
18844
18845    #[simd_test(enable = "avx512bw,avx512vl")]
18846    unsafe fn test_mm256_maskz_unpacklo_epi16() {
18847        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18848        let b = _mm256_set_epi16(
18849            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18850        );
18851        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
18852        assert_eq_m256i(r, _mm256_setzero_si256());
18853        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
18854        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18855        assert_eq_m256i(r, e);
18856    }
18857
18858    #[simd_test(enable = "avx512bw,avx512vl")]
18859    unsafe fn test_mm_mask_unpacklo_epi16() {
18860        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18861        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18862        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
18863        assert_eq_m128i(r, a);
18864        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
18865        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18866        assert_eq_m128i(r, e);
18867    }
18868
18869    #[simd_test(enable = "avx512bw,avx512vl")]
18870    unsafe fn test_mm_maskz_unpacklo_epi16() {
18871        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18872        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18873        let r = _mm_maskz_unpacklo_epi16(0, a, b);
18874        assert_eq_m128i(r, _mm_setzero_si128());
18875        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
18876        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18877        assert_eq_m128i(r, e);
18878    }
18879
18880    #[simd_test(enable = "avx512bw")]
18881    unsafe fn test_mm512_unpacklo_epi8() {
18882        #[rustfmt::skip]
18883        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18884                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18885                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18886                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18887        #[rustfmt::skip]
18888        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18889                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18890                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18891                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18892        let r = _mm512_unpacklo_epi8(a, b);
18893        #[rustfmt::skip]
18894        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18895                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18896                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18897                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18898        assert_eq_m512i(r, e);
18899    }
18900
18901    #[simd_test(enable = "avx512bw")]
18902    unsafe fn test_mm512_mask_unpacklo_epi8() {
18903        #[rustfmt::skip]
18904        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18905                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18906                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18907                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18908        #[rustfmt::skip]
18909        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18910                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18911                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18912                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18913        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
18914        assert_eq_m512i(r, a);
18915        let r = _mm512_mask_unpacklo_epi8(
18916            a,
18917            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18918            a,
18919            b,
18920        );
18921        #[rustfmt::skip]
18922        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18923                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18924                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18925                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18926        assert_eq_m512i(r, e);
18927    }
18928
18929    #[simd_test(enable = "avx512bw")]
18930    unsafe fn test_mm512_maskz_unpacklo_epi8() {
18931        #[rustfmt::skip]
18932        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18933                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18934                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18935                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18936        #[rustfmt::skip]
18937        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18938                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18939                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18940                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18941        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
18942        assert_eq_m512i(r, _mm512_setzero_si512());
18943        let r = _mm512_maskz_unpacklo_epi8(
18944            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18945            a,
18946            b,
18947        );
18948        #[rustfmt::skip]
18949        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18950                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18951                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18952                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18953        assert_eq_m512i(r, e);
18954    }
18955
18956    #[simd_test(enable = "avx512bw,avx512vl")]
18957    unsafe fn test_mm256_mask_unpacklo_epi8() {
18958        #[rustfmt::skip]
18959        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18960                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18961        #[rustfmt::skip]
18962        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18963                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18964        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
18965        assert_eq_m256i(r, a);
18966        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18967        #[rustfmt::skip]
18968        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18969                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18970        assert_eq_m256i(r, e);
18971    }
18972
18973    #[simd_test(enable = "avx512bw,avx512vl")]
18974    unsafe fn test_mm256_maskz_unpacklo_epi8() {
18975        #[rustfmt::skip]
18976        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18977                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18978        #[rustfmt::skip]
18979        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18980                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18981        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
18982        assert_eq_m256i(r, _mm256_setzero_si256());
18983        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
18984        #[rustfmt::skip]
18985        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18986                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18987        assert_eq_m256i(r, e);
18988    }
18989
18990    #[simd_test(enable = "avx512bw,avx512vl")]
18991    unsafe fn test_mm_mask_unpacklo_epi8() {
18992        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18993        let b = _mm_set_epi8(
18994            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18995        );
18996        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
18997        assert_eq_m128i(r, a);
18998        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
18999        let e = _mm_set_epi8(
19000            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19001        );
19002        assert_eq_m128i(r, e);
19003    }
19004
19005    #[simd_test(enable = "avx512bw,avx512vl")]
19006    unsafe fn test_mm_maskz_unpacklo_epi8() {
19007        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19008        let b = _mm_set_epi8(
19009            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19010        );
19011        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19012        assert_eq_m128i(r, _mm_setzero_si128());
19013        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19014        let e = _mm_set_epi8(
19015            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19016        );
19017        assert_eq_m128i(r, e);
19018    }
19019
19020    #[simd_test(enable = "avx512bw")]
19021    unsafe fn test_mm512_mask_mov_epi16() {
19022        let src = _mm512_set1_epi16(1);
19023        let a = _mm512_set1_epi16(2);
19024        let r = _mm512_mask_mov_epi16(src, 0, a);
19025        assert_eq_m512i(r, src);
19026        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19027        assert_eq_m512i(r, a);
19028    }
19029
19030    #[simd_test(enable = "avx512bw")]
19031    unsafe fn test_mm512_maskz_mov_epi16() {
19032        let a = _mm512_set1_epi16(2);
19033        let r = _mm512_maskz_mov_epi16(0, a);
19034        assert_eq_m512i(r, _mm512_setzero_si512());
19035        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19036        assert_eq_m512i(r, a);
19037    }
19038
19039    #[simd_test(enable = "avx512bw,avx512vl")]
19040    unsafe fn test_mm256_mask_mov_epi16() {
19041        let src = _mm256_set1_epi16(1);
19042        let a = _mm256_set1_epi16(2);
19043        let r = _mm256_mask_mov_epi16(src, 0, a);
19044        assert_eq_m256i(r, src);
19045        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19046        assert_eq_m256i(r, a);
19047    }
19048
19049    #[simd_test(enable = "avx512bw,avx512vl")]
19050    unsafe fn test_mm256_maskz_mov_epi16() {
19051        let a = _mm256_set1_epi16(2);
19052        let r = _mm256_maskz_mov_epi16(0, a);
19053        assert_eq_m256i(r, _mm256_setzero_si256());
19054        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19055        assert_eq_m256i(r, a);
19056    }
19057
19058    #[simd_test(enable = "avx512bw,avx512vl")]
19059    unsafe fn test_mm_mask_mov_epi16() {
19060        let src = _mm_set1_epi16(1);
19061        let a = _mm_set1_epi16(2);
19062        let r = _mm_mask_mov_epi16(src, 0, a);
19063        assert_eq_m128i(r, src);
19064        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19065        assert_eq_m128i(r, a);
19066    }
19067
19068    #[simd_test(enable = "avx512bw,avx512vl")]
19069    unsafe fn test_mm_maskz_mov_epi16() {
19070        let a = _mm_set1_epi16(2);
19071        let r = _mm_maskz_mov_epi16(0, a);
19072        assert_eq_m128i(r, _mm_setzero_si128());
19073        let r = _mm_maskz_mov_epi16(0b11111111, a);
19074        assert_eq_m128i(r, a);
19075    }
19076
19077    #[simd_test(enable = "avx512bw")]
19078    unsafe fn test_mm512_mask_mov_epi8() {
19079        let src = _mm512_set1_epi8(1);
19080        let a = _mm512_set1_epi8(2);
19081        let r = _mm512_mask_mov_epi8(src, 0, a);
19082        assert_eq_m512i(r, src);
19083        let r = _mm512_mask_mov_epi8(
19084            src,
19085            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19086            a,
19087        );
19088        assert_eq_m512i(r, a);
19089    }
19090
19091    #[simd_test(enable = "avx512bw")]
19092    unsafe fn test_mm512_maskz_mov_epi8() {
19093        let a = _mm512_set1_epi8(2);
19094        let r = _mm512_maskz_mov_epi8(0, a);
19095        assert_eq_m512i(r, _mm512_setzero_si512());
19096        let r = _mm512_maskz_mov_epi8(
19097            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19098            a,
19099        );
19100        assert_eq_m512i(r, a);
19101    }
19102
19103    #[simd_test(enable = "avx512bw,avx512vl")]
19104    unsafe fn test_mm256_mask_mov_epi8() {
19105        let src = _mm256_set1_epi8(1);
19106        let a = _mm256_set1_epi8(2);
19107        let r = _mm256_mask_mov_epi8(src, 0, a);
19108        assert_eq_m256i(r, src);
19109        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19110        assert_eq_m256i(r, a);
19111    }
19112
19113    #[simd_test(enable = "avx512bw,avx512vl")]
19114    unsafe fn test_mm256_maskz_mov_epi8() {
19115        let a = _mm256_set1_epi8(2);
19116        let r = _mm256_maskz_mov_epi8(0, a);
19117        assert_eq_m256i(r, _mm256_setzero_si256());
19118        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
19119        assert_eq_m256i(r, a);
19120    }
19121
19122    #[simd_test(enable = "avx512bw,avx512vl")]
19123    unsafe fn test_mm_mask_mov_epi8() {
19124        let src = _mm_set1_epi8(1);
19125        let a = _mm_set1_epi8(2);
19126        let r = _mm_mask_mov_epi8(src, 0, a);
19127        assert_eq_m128i(r, src);
19128        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
19129        assert_eq_m128i(r, a);
19130    }
19131
19132    #[simd_test(enable = "avx512bw,avx512vl")]
19133    unsafe fn test_mm_maskz_mov_epi8() {
19134        let a = _mm_set1_epi8(2);
19135        let r = _mm_maskz_mov_epi8(0, a);
19136        assert_eq_m128i(r, _mm_setzero_si128());
19137        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
19138        assert_eq_m128i(r, a);
19139    }
19140
19141    #[simd_test(enable = "avx512bw")]
19142    unsafe fn test_mm512_mask_set1_epi16() {
19143        let src = _mm512_set1_epi16(2);
19144        let a: i16 = 11;
19145        let r = _mm512_mask_set1_epi16(src, 0, a);
19146        assert_eq_m512i(r, src);
19147        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19148        let e = _mm512_set1_epi16(11);
19149        assert_eq_m512i(r, e);
19150    }
19151
19152    #[simd_test(enable = "avx512bw")]
19153    unsafe fn test_mm512_maskz_set1_epi16() {
19154        let a: i16 = 11;
19155        let r = _mm512_maskz_set1_epi16(0, a);
19156        assert_eq_m512i(r, _mm512_setzero_si512());
19157        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
19158        let e = _mm512_set1_epi16(11);
19159        assert_eq_m512i(r, e);
19160    }
19161
19162    #[simd_test(enable = "avx512bw,avx512vl")]
19163    unsafe fn test_mm256_mask_set1_epi16() {
19164        let src = _mm256_set1_epi16(2);
19165        let a: i16 = 11;
19166        let r = _mm256_mask_set1_epi16(src, 0, a);
19167        assert_eq_m256i(r, src);
19168        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
19169        let e = _mm256_set1_epi16(11);
19170        assert_eq_m256i(r, e);
19171    }
19172
19173    #[simd_test(enable = "avx512bw,avx512vl")]
19174    unsafe fn test_mm256_maskz_set1_epi16() {
19175        let a: i16 = 11;
19176        let r = _mm256_maskz_set1_epi16(0, a);
19177        assert_eq_m256i(r, _mm256_setzero_si256());
19178        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
19179        let e = _mm256_set1_epi16(11);
19180        assert_eq_m256i(r, e);
19181    }
19182
19183    #[simd_test(enable = "avx512bw,avx512vl")]
19184    unsafe fn test_mm_mask_set1_epi16() {
19185        let src = _mm_set1_epi16(2);
19186        let a: i16 = 11;
19187        let r = _mm_mask_set1_epi16(src, 0, a);
19188        assert_eq_m128i(r, src);
19189        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
19190        let e = _mm_set1_epi16(11);
19191        assert_eq_m128i(r, e);
19192    }
19193
19194    #[simd_test(enable = "avx512bw,avx512vl")]
19195    unsafe fn test_mm_maskz_set1_epi16() {
19196        let a: i16 = 11;
19197        let r = _mm_maskz_set1_epi16(0, a);
19198        assert_eq_m128i(r, _mm_setzero_si128());
19199        let r = _mm_maskz_set1_epi16(0b11111111, a);
19200        let e = _mm_set1_epi16(11);
19201        assert_eq_m128i(r, e);
19202    }
19203
19204    #[simd_test(enable = "avx512bw")]
19205    unsafe fn test_mm512_mask_set1_epi8() {
19206        let src = _mm512_set1_epi8(2);
19207        let a: i8 = 11;
19208        let r = _mm512_mask_set1_epi8(src, 0, a);
19209        assert_eq_m512i(r, src);
19210        let r = _mm512_mask_set1_epi8(
19211            src,
19212            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19213            a,
19214        );
19215        let e = _mm512_set1_epi8(11);
19216        assert_eq_m512i(r, e);
19217    }
19218
19219    #[simd_test(enable = "avx512bw")]
19220    unsafe fn test_mm512_maskz_set1_epi8() {
19221        let a: i8 = 11;
19222        let r = _mm512_maskz_set1_epi8(0, a);
19223        assert_eq_m512i(r, _mm512_setzero_si512());
19224        let r = _mm512_maskz_set1_epi8(
19225            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19226            a,
19227        );
19228        let e = _mm512_set1_epi8(11);
19229        assert_eq_m512i(r, e);
19230    }
19231
19232    #[simd_test(enable = "avx512bw,avx512vl")]
19233    unsafe fn test_mm256_mask_set1_epi8() {
19234        let src = _mm256_set1_epi8(2);
19235        let a: i8 = 11;
19236        let r = _mm256_mask_set1_epi8(src, 0, a);
19237        assert_eq_m256i(r, src);
19238        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19239        let e = _mm256_set1_epi8(11);
19240        assert_eq_m256i(r, e);
19241    }
19242
19243    #[simd_test(enable = "avx512bw,avx512vl")]
19244    unsafe fn test_mm256_maskz_set1_epi8() {
19245        let a: i8 = 11;
19246        let r = _mm256_maskz_set1_epi8(0, a);
19247        assert_eq_m256i(r, _mm256_setzero_si256());
19248        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
19249        let e = _mm256_set1_epi8(11);
19250        assert_eq_m256i(r, e);
19251    }
19252
19253    #[simd_test(enable = "avx512bw,avx512vl")]
19254    unsafe fn test_mm_mask_set1_epi8() {
19255        let src = _mm_set1_epi8(2);
19256        let a: i8 = 11;
19257        let r = _mm_mask_set1_epi8(src, 0, a);
19258        assert_eq_m128i(r, src);
19259        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
19260        let e = _mm_set1_epi8(11);
19261        assert_eq_m128i(r, e);
19262    }
19263
19264    #[simd_test(enable = "avx512bw,avx512vl")]
19265    unsafe fn test_mm_maskz_set1_epi8() {
19266        let a: i8 = 11;
19267        let r = _mm_maskz_set1_epi8(0, a);
19268        assert_eq_m128i(r, _mm_setzero_si128());
19269        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
19270        let e = _mm_set1_epi8(11);
19271        assert_eq_m128i(r, e);
19272    }
19273
19274    #[simd_test(enable = "avx512bw")]
19275    unsafe fn test_mm512_shufflelo_epi16() {
19276        #[rustfmt::skip]
19277        let a = _mm512_set_epi16(
19278            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19279            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19280        );
19281        #[rustfmt::skip]
19282        let e = _mm512_set_epi16(
19283            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19284            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19285        );
19286        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
19287        assert_eq_m512i(r, e);
19288    }
19289
19290    #[simd_test(enable = "avx512bw")]
19291    unsafe fn test_mm512_mask_shufflelo_epi16() {
19292        #[rustfmt::skip]
19293        let a = _mm512_set_epi16(
19294            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19295            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19296        );
19297        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19298        assert_eq_m512i(r, a);
19299        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
19300            a,
19301            0b11111111_11111111_11111111_11111111,
19302            a,
19303        );
19304        #[rustfmt::skip]
19305        let e = _mm512_set_epi16(
19306            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19307            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19308        );
19309        assert_eq_m512i(r, e);
19310    }
19311
19312    #[simd_test(enable = "avx512bw")]
19313    unsafe fn test_mm512_maskz_shufflelo_epi16() {
19314        #[rustfmt::skip]
19315        let a = _mm512_set_epi16(
19316            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19317            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19318        );
19319        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19320        assert_eq_m512i(r, _mm512_setzero_si512());
19321        let r =
19322            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19323        #[rustfmt::skip]
19324        let e = _mm512_set_epi16(
19325            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19326            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19327        );
19328        assert_eq_m512i(r, e);
19329    }
19330
19331    #[simd_test(enable = "avx512bw,avx512vl")]
19332    unsafe fn test_mm256_mask_shufflelo_epi16() {
19333        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19334        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19335        assert_eq_m256i(r, a);
19336        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19337        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19338        assert_eq_m256i(r, e);
19339    }
19340
19341    #[simd_test(enable = "avx512bw,avx512vl")]
19342    unsafe fn test_mm256_maskz_shufflelo_epi16() {
19343        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19344        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19345        assert_eq_m256i(r, _mm256_setzero_si256());
19346        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19347        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19348        assert_eq_m256i(r, e);
19349    }
19350
19351    #[simd_test(enable = "avx512bw,avx512vl")]
19352    unsafe fn test_mm_mask_shufflelo_epi16() {
19353        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19354        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19355        assert_eq_m128i(r, a);
19356        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19357        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19358        assert_eq_m128i(r, e);
19359    }
19360
19361    #[simd_test(enable = "avx512bw,avx512vl")]
19362    unsafe fn test_mm_maskz_shufflelo_epi16() {
19363        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19364        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19365        assert_eq_m128i(r, _mm_setzero_si128());
19366        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
19367        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19368        assert_eq_m128i(r, e);
19369    }
19370
19371    #[simd_test(enable = "avx512bw")]
19372    unsafe fn test_mm512_shufflehi_epi16() {
19373        #[rustfmt::skip]
19374        let a = _mm512_set_epi16(
19375            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19376            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19377        );
19378        #[rustfmt::skip]
19379        let e = _mm512_set_epi16(
19380            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19381            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19382        );
19383        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
19384        assert_eq_m512i(r, e);
19385    }
19386
19387    #[simd_test(enable = "avx512bw")]
19388    unsafe fn test_mm512_mask_shufflehi_epi16() {
19389        #[rustfmt::skip]
19390        let a = _mm512_set_epi16(
19391            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19392            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19393        );
19394        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19395        assert_eq_m512i(r, a);
19396        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
19397            a,
19398            0b11111111_11111111_11111111_11111111,
19399            a,
19400        );
19401        #[rustfmt::skip]
19402        let e = _mm512_set_epi16(
19403            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19404            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19405        );
19406        assert_eq_m512i(r, e);
19407    }
19408
19409    #[simd_test(enable = "avx512bw")]
19410    unsafe fn test_mm512_maskz_shufflehi_epi16() {
19411        #[rustfmt::skip]
19412        let a = _mm512_set_epi16(
19413            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19414            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19415        );
19416        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19417        assert_eq_m512i(r, _mm512_setzero_si512());
19418        let r =
19419            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19420        #[rustfmt::skip]
19421        let e = _mm512_set_epi16(
19422            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19423            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19424        );
19425        assert_eq_m512i(r, e);
19426    }
19427
19428    #[simd_test(enable = "avx512bw,avx512vl")]
19429    unsafe fn test_mm256_mask_shufflehi_epi16() {
19430        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19431        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19432        assert_eq_m256i(r, a);
19433        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19434        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19435        assert_eq_m256i(r, e);
19436    }
19437
19438    #[simd_test(enable = "avx512bw,avx512vl")]
19439    unsafe fn test_mm256_maskz_shufflehi_epi16() {
19440        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19441        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19442        assert_eq_m256i(r, _mm256_setzero_si256());
19443        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19444        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19445        assert_eq_m256i(r, e);
19446    }
19447
19448    #[simd_test(enable = "avx512bw,avx512vl")]
19449    unsafe fn test_mm_mask_shufflehi_epi16() {
19450        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19451        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19452        assert_eq_m128i(r, a);
19453        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19454        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19455        assert_eq_m128i(r, e);
19456    }
19457
19458    #[simd_test(enable = "avx512bw,avx512vl")]
19459    unsafe fn test_mm_maskz_shufflehi_epi16() {
19460        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19461        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19462        assert_eq_m128i(r, _mm_setzero_si128());
19463        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
19464        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19465        assert_eq_m128i(r, e);
19466    }
19467
19468    #[simd_test(enable = "avx512bw")]
19469    unsafe fn test_mm512_shuffle_epi8() {
19470        #[rustfmt::skip]
19471        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19472                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19473                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19474                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19475        let b = _mm512_set1_epi8(1);
19476        let r = _mm512_shuffle_epi8(a, b);
19477        #[rustfmt::skip]
19478        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19479                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19480                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19481                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19482        assert_eq_m512i(r, e);
19483    }
19484
19485    #[simd_test(enable = "avx512bw")]
19486    unsafe fn test_mm512_mask_shuffle_epi8() {
19487        #[rustfmt::skip]
19488        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19489                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19490                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19491                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19492        let b = _mm512_set1_epi8(1);
19493        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
19494        assert_eq_m512i(r, a);
19495        let r = _mm512_mask_shuffle_epi8(
19496            a,
19497            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19498            a,
19499            b,
19500        );
19501        #[rustfmt::skip]
19502        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19503                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19504                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19505                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19506        assert_eq_m512i(r, e);
19507    }
19508
19509    #[simd_test(enable = "avx512bw")]
19510    unsafe fn test_mm512_maskz_shuffle_epi8() {
19511        #[rustfmt::skip]
19512        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19513                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19514                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19515                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19516        let b = _mm512_set1_epi8(1);
19517        let r = _mm512_maskz_shuffle_epi8(0, a, b);
19518        assert_eq_m512i(r, _mm512_setzero_si512());
19519        let r = _mm512_maskz_shuffle_epi8(
19520            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19521            a,
19522            b,
19523        );
19524        #[rustfmt::skip]
19525        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19526                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19527                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19528                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19529        assert_eq_m512i(r, e);
19530    }
19531
19532    #[simd_test(enable = "avx512bw,avx512vl")]
19533    unsafe fn test_mm256_mask_shuffle_epi8() {
19534        #[rustfmt::skip]
19535        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19536                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19537        let b = _mm256_set1_epi8(1);
19538        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
19539        assert_eq_m256i(r, a);
19540        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19541        #[rustfmt::skip]
19542        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19543                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19544        assert_eq_m256i(r, e);
19545    }
19546
19547    #[simd_test(enable = "avx512bw,avx512vl")]
19548    unsafe fn test_mm256_maskz_shuffle_epi8() {
19549        #[rustfmt::skip]
19550        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19551                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19552        let b = _mm256_set1_epi8(1);
19553        let r = _mm256_maskz_shuffle_epi8(0, a, b);
19554        assert_eq_m256i(r, _mm256_setzero_si256());
19555        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
19556        #[rustfmt::skip]
19557        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19558                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19559        assert_eq_m256i(r, e);
19560    }
19561
19562    #[simd_test(enable = "avx512bw,avx512vl")]
19563    unsafe fn test_mm_mask_shuffle_epi8() {
19564        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19565        let b = _mm_set1_epi8(1);
19566        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
19567        assert_eq_m128i(r, a);
19568        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
19569        let e = _mm_set_epi8(
19570            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19571        );
19572        assert_eq_m128i(r, e);
19573    }
19574
19575    #[simd_test(enable = "avx512bw,avx512vl")]
19576    unsafe fn test_mm_maskz_shuffle_epi8() {
19577        #[rustfmt::skip]
19578        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
19579        let b = _mm_set1_epi8(1);
19580        let r = _mm_maskz_shuffle_epi8(0, a, b);
19581        assert_eq_m128i(r, _mm_setzero_si128());
19582        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
19583        let e = _mm_set_epi8(
19584            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19585        );
19586        assert_eq_m128i(r, e);
19587    }
19588
19589    #[simd_test(enable = "avx512bw")]
19590    unsafe fn test_mm512_test_epi16_mask() {
19591        let a = _mm512_set1_epi16(1 << 0);
19592        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19593        let r = _mm512_test_epi16_mask(a, b);
19594        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19595        assert_eq!(r, e);
19596    }
19597
19598    #[simd_test(enable = "avx512bw")]
19599    unsafe fn test_mm512_mask_test_epi16_mask() {
19600        let a = _mm512_set1_epi16(1 << 0);
19601        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19602        let r = _mm512_mask_test_epi16_mask(0, a, b);
19603        assert_eq!(r, 0);
19604        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19605        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19606        assert_eq!(r, e);
19607    }
19608
19609    #[simd_test(enable = "avx512bw,avx512vl")]
19610    unsafe fn test_mm256_test_epi16_mask() {
19611        let a = _mm256_set1_epi16(1 << 0);
19612        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19613        let r = _mm256_test_epi16_mask(a, b);
19614        let e: __mmask16 = 0b11111111_11111111;
19615        assert_eq!(r, e);
19616    }
19617
19618    #[simd_test(enable = "avx512bw,avx512vl")]
19619    unsafe fn test_mm256_mask_test_epi16_mask() {
19620        let a = _mm256_set1_epi16(1 << 0);
19621        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19622        let r = _mm256_mask_test_epi16_mask(0, a, b);
19623        assert_eq!(r, 0);
19624        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
19625        let e: __mmask16 = 0b11111111_11111111;
19626        assert_eq!(r, e);
19627    }
19628
19629    #[simd_test(enable = "avx512bw,avx512vl")]
19630    unsafe fn test_mm_test_epi16_mask() {
19631        let a = _mm_set1_epi16(1 << 0);
19632        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19633        let r = _mm_test_epi16_mask(a, b);
19634        let e: __mmask8 = 0b11111111;
19635        assert_eq!(r, e);
19636    }
19637
19638    #[simd_test(enable = "avx512bw,avx512vl")]
19639    unsafe fn test_mm_mask_test_epi16_mask() {
19640        let a = _mm_set1_epi16(1 << 0);
19641        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19642        let r = _mm_mask_test_epi16_mask(0, a, b);
19643        assert_eq!(r, 0);
19644        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
19645        let e: __mmask8 = 0b11111111;
19646        assert_eq!(r, e);
19647    }
19648
19649    #[simd_test(enable = "avx512bw")]
19650    unsafe fn test_mm512_test_epi8_mask() {
19651        let a = _mm512_set1_epi8(1 << 0);
19652        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19653        let r = _mm512_test_epi8_mask(a, b);
19654        let e: __mmask64 =
19655            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19656        assert_eq!(r, e);
19657    }
19658
19659    #[simd_test(enable = "avx512bw")]
19660    unsafe fn test_mm512_mask_test_epi8_mask() {
19661        let a = _mm512_set1_epi8(1 << 0);
19662        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19663        let r = _mm512_mask_test_epi8_mask(0, a, b);
19664        assert_eq!(r, 0);
19665        let r = _mm512_mask_test_epi8_mask(
19666            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19667            a,
19668            b,
19669        );
19670        let e: __mmask64 =
19671            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19672        assert_eq!(r, e);
19673    }
19674
19675    #[simd_test(enable = "avx512bw,avx512vl")]
19676    unsafe fn test_mm256_test_epi8_mask() {
19677        let a = _mm256_set1_epi8(1 << 0);
19678        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19679        let r = _mm256_test_epi8_mask(a, b);
19680        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19681        assert_eq!(r, e);
19682    }
19683
19684    #[simd_test(enable = "avx512bw,avx512vl")]
19685    unsafe fn test_mm256_mask_test_epi8_mask() {
19686        let a = _mm256_set1_epi8(1 << 0);
19687        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19688        let r = _mm256_mask_test_epi8_mask(0, a, b);
19689        assert_eq!(r, 0);
19690        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19691        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19692        assert_eq!(r, e);
19693    }
19694
19695    #[simd_test(enable = "avx512bw,avx512vl")]
19696    unsafe fn test_mm_test_epi8_mask() {
19697        let a = _mm_set1_epi8(1 << 0);
19698        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19699        let r = _mm_test_epi8_mask(a, b);
19700        let e: __mmask16 = 0b11111111_11111111;
19701        assert_eq!(r, e);
19702    }
19703
19704    #[simd_test(enable = "avx512bw,avx512vl")]
19705    unsafe fn test_mm_mask_test_epi8_mask() {
19706        let a = _mm_set1_epi8(1 << 0);
19707        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19708        let r = _mm_mask_test_epi8_mask(0, a, b);
19709        assert_eq!(r, 0);
19710        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
19711        let e: __mmask16 = 0b11111111_11111111;
19712        assert_eq!(r, e);
19713    }
19714
19715    #[simd_test(enable = "avx512bw")]
19716    unsafe fn test_mm512_testn_epi16_mask() {
19717        let a = _mm512_set1_epi16(1 << 0);
19718        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19719        let r = _mm512_testn_epi16_mask(a, b);
19720        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19721        assert_eq!(r, e);
19722    }
19723
19724    #[simd_test(enable = "avx512bw")]
19725    unsafe fn test_mm512_mask_testn_epi16_mask() {
19726        let a = _mm512_set1_epi16(1 << 0);
19727        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19728        let r = _mm512_mask_testn_epi16_mask(0, a, b);
19729        assert_eq!(r, 0);
19730        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19731        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19732        assert_eq!(r, e);
19733    }
19734
19735    #[simd_test(enable = "avx512bw,avx512vl")]
19736    unsafe fn test_mm256_testn_epi16_mask() {
19737        let a = _mm256_set1_epi16(1 << 0);
19738        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19739        let r = _mm256_testn_epi16_mask(a, b);
19740        let e: __mmask16 = 0b00000000_00000000;
19741        assert_eq!(r, e);
19742    }
19743
19744    #[simd_test(enable = "avx512bw,avx512vl")]
19745    unsafe fn test_mm256_mask_testn_epi16_mask() {
19746        let a = _mm256_set1_epi16(1 << 0);
19747        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19748        let r = _mm256_mask_testn_epi16_mask(0, a, b);
19749        assert_eq!(r, 0);
19750        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
19751        let e: __mmask16 = 0b00000000_00000000;
19752        assert_eq!(r, e);
19753    }
19754
19755    #[simd_test(enable = "avx512bw,avx512vl")]
19756    unsafe fn test_mm_testn_epi16_mask() {
19757        let a = _mm_set1_epi16(1 << 0);
19758        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19759        let r = _mm_testn_epi16_mask(a, b);
19760        let e: __mmask8 = 0b00000000;
19761        assert_eq!(r, e);
19762    }
19763
19764    #[simd_test(enable = "avx512bw,avx512vl")]
19765    unsafe fn test_mm_mask_testn_epi16_mask() {
19766        let a = _mm_set1_epi16(1 << 0);
19767        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19768        let r = _mm_mask_testn_epi16_mask(0, a, b);
19769        assert_eq!(r, 0);
19770        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
19771        let e: __mmask8 = 0b00000000;
19772        assert_eq!(r, e);
19773    }
19774
19775    #[simd_test(enable = "avx512bw")]
19776    unsafe fn test_mm512_testn_epi8_mask() {
19777        let a = _mm512_set1_epi8(1 << 0);
19778        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19779        let r = _mm512_testn_epi8_mask(a, b);
19780        let e: __mmask64 =
19781            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19782        assert_eq!(r, e);
19783    }
19784
19785    #[simd_test(enable = "avx512bw")]
19786    unsafe fn test_mm512_mask_testn_epi8_mask() {
19787        let a = _mm512_set1_epi8(1 << 0);
19788        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19789        let r = _mm512_mask_testn_epi8_mask(0, a, b);
19790        assert_eq!(r, 0);
19791        let r = _mm512_mask_testn_epi8_mask(
19792            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19793            a,
19794            b,
19795        );
19796        let e: __mmask64 =
19797            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19798        assert_eq!(r, e);
19799    }
19800
19801    #[simd_test(enable = "avx512bw,avx512vl")]
19802    unsafe fn test_mm256_testn_epi8_mask() {
19803        let a = _mm256_set1_epi8(1 << 0);
19804        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19805        let r = _mm256_testn_epi8_mask(a, b);
19806        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19807        assert_eq!(r, e);
19808    }
19809
19810    #[simd_test(enable = "avx512bw,avx512vl")]
19811    unsafe fn test_mm256_mask_testn_epi8_mask() {
19812        let a = _mm256_set1_epi8(1 << 0);
19813        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19814        let r = _mm256_mask_testn_epi8_mask(0, a, b);
19815        assert_eq!(r, 0);
19816        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19817        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19818        assert_eq!(r, e);
19819    }
19820
19821    #[simd_test(enable = "avx512bw,avx512vl")]
19822    unsafe fn test_mm_testn_epi8_mask() {
19823        let a = _mm_set1_epi8(1 << 0);
19824        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19825        let r = _mm_testn_epi8_mask(a, b);
19826        let e: __mmask16 = 0b00000000_00000000;
19827        assert_eq!(r, e);
19828    }
19829
19830    #[simd_test(enable = "avx512bw,avx512vl")]
19831    unsafe fn test_mm_mask_testn_epi8_mask() {
19832        let a = _mm_set1_epi8(1 << 0);
19833        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19834        let r = _mm_mask_testn_epi8_mask(0, a, b);
19835        assert_eq!(r, 0);
19836        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
19837        let e: __mmask16 = 0b00000000_00000000;
19838        assert_eq!(r, e);
19839    }
19840
19841    #[simd_test(enable = "avx512bw")]
19842    unsafe fn test_store_mask64() {
19843        let a: __mmask64 =
19844            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19845        let mut r = 0;
19846        _store_mask64(&mut r, a);
19847        assert_eq!(r, a);
19848    }
19849
19850    #[simd_test(enable = "avx512bw")]
19851    unsafe fn test_store_mask32() {
19852        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
19853        let mut r = 0;
19854        _store_mask32(&mut r, a);
19855        assert_eq!(r, a);
19856    }
19857
19858    #[simd_test(enable = "avx512bw")]
19859    unsafe fn test_load_mask64() {
19860        let p: __mmask64 =
19861            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19862        let r = _load_mask64(&p);
19863        let e: __mmask64 =
19864            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19865        assert_eq!(r, e);
19866    }
19867
19868    #[simd_test(enable = "avx512bw")]
19869    unsafe fn test_load_mask32() {
19870        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
19871        let r = _load_mask32(&p);
19872        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
19873        assert_eq!(r, e);
19874    }
19875
19876    #[simd_test(enable = "avx512bw")]
19877    unsafe fn test_mm512_sad_epu8() {
19878        let a = _mm512_set1_epi8(2);
19879        let b = _mm512_set1_epi8(4);
19880        let r = _mm512_sad_epu8(a, b);
19881        let e = _mm512_set1_epi64(16);
19882        assert_eq_m512i(r, e);
19883    }
19884
19885    #[simd_test(enable = "avx512bw")]
19886    unsafe fn test_mm512_dbsad_epu8() {
19887        let a = _mm512_set1_epi8(2);
19888        let b = _mm512_set1_epi8(4);
19889        let r = _mm512_dbsad_epu8::<0>(a, b);
19890        let e = _mm512_set1_epi16(8);
19891        assert_eq_m512i(r, e);
19892    }
19893
19894    #[simd_test(enable = "avx512bw")]
19895    unsafe fn test_mm512_mask_dbsad_epu8() {
19896        let src = _mm512_set1_epi16(1);
19897        let a = _mm512_set1_epi8(2);
19898        let b = _mm512_set1_epi8(4);
19899        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
19900        assert_eq_m512i(r, src);
19901        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
19902        let e = _mm512_set1_epi16(8);
19903        assert_eq_m512i(r, e);
19904    }
19905
19906    #[simd_test(enable = "avx512bw")]
19907    unsafe fn test_mm512_maskz_dbsad_epu8() {
19908        let a = _mm512_set1_epi8(2);
19909        let b = _mm512_set1_epi8(4);
19910        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
19911        assert_eq_m512i(r, _mm512_setzero_si512());
19912        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
19913        let e = _mm512_set1_epi16(8);
19914        assert_eq_m512i(r, e);
19915    }
19916
19917    #[simd_test(enable = "avx512bw,avx512vl")]
19918    unsafe fn test_mm256_dbsad_epu8() {
19919        let a = _mm256_set1_epi8(2);
19920        let b = _mm256_set1_epi8(4);
19921        let r = _mm256_dbsad_epu8::<0>(a, b);
19922        let e = _mm256_set1_epi16(8);
19923        assert_eq_m256i(r, e);
19924    }
19925
19926    #[simd_test(enable = "avx512bw,avx512vl")]
19927    unsafe fn test_mm256_mask_dbsad_epu8() {
19928        let src = _mm256_set1_epi16(1);
19929        let a = _mm256_set1_epi8(2);
19930        let b = _mm256_set1_epi8(4);
19931        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
19932        assert_eq_m256i(r, src);
19933        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
19934        let e = _mm256_set1_epi16(8);
19935        assert_eq_m256i(r, e);
19936    }
19937
19938    #[simd_test(enable = "avx512bw,avx512vl")]
19939    unsafe fn test_mm256_maskz_dbsad_epu8() {
19940        let a = _mm256_set1_epi8(2);
19941        let b = _mm256_set1_epi8(4);
19942        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
19943        assert_eq_m256i(r, _mm256_setzero_si256());
19944        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
19945        let e = _mm256_set1_epi16(8);
19946        assert_eq_m256i(r, e);
19947    }
19948
19949    #[simd_test(enable = "avx512bw,avx512vl")]
19950    unsafe fn test_mm_dbsad_epu8() {
19951        let a = _mm_set1_epi8(2);
19952        let b = _mm_set1_epi8(4);
19953        let r = _mm_dbsad_epu8::<0>(a, b);
19954        let e = _mm_set1_epi16(8);
19955        assert_eq_m128i(r, e);
19956    }
19957
19958    #[simd_test(enable = "avx512bw,avx512vl")]
19959    unsafe fn test_mm_mask_dbsad_epu8() {
19960        let src = _mm_set1_epi16(1);
19961        let a = _mm_set1_epi8(2);
19962        let b = _mm_set1_epi8(4);
19963        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
19964        assert_eq_m128i(r, src);
19965        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
19966        let e = _mm_set1_epi16(8);
19967        assert_eq_m128i(r, e);
19968    }
19969
19970    #[simd_test(enable = "avx512bw,avx512vl")]
19971    unsafe fn test_mm_maskz_dbsad_epu8() {
19972        let a = _mm_set1_epi8(2);
19973        let b = _mm_set1_epi8(4);
19974        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
19975        assert_eq_m128i(r, _mm_setzero_si128());
19976        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
19977        let e = _mm_set1_epi16(8);
19978        assert_eq_m128i(r, e);
19979    }
19980
19981    #[simd_test(enable = "avx512bw")]
19982    unsafe fn test_mm512_movepi16_mask() {
19983        let a = _mm512_set1_epi16(1 << 15);
19984        let r = _mm512_movepi16_mask(a);
19985        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19986        assert_eq!(r, e);
19987    }
19988
19989    #[simd_test(enable = "avx512bw,avx512vl")]
19990    unsafe fn test_mm256_movepi16_mask() {
19991        let a = _mm256_set1_epi16(1 << 15);
19992        let r = _mm256_movepi16_mask(a);
19993        let e: __mmask16 = 0b11111111_11111111;
19994        assert_eq!(r, e);
19995    }
19996
19997    #[simd_test(enable = "avx512bw,avx512vl")]
19998    unsafe fn test_mm_movepi16_mask() {
19999        let a = _mm_set1_epi16(1 << 15);
20000        let r = _mm_movepi16_mask(a);
20001        let e: __mmask8 = 0b11111111;
20002        assert_eq!(r, e);
20003    }
20004
20005    #[simd_test(enable = "avx512bw")]
20006    unsafe fn test_mm512_movepi8_mask() {
20007        let a = _mm512_set1_epi8(1 << 7);
20008        let r = _mm512_movepi8_mask(a);
20009        let e: __mmask64 =
20010            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20011        assert_eq!(r, e);
20012    }
20013
20014    #[simd_test(enable = "avx512bw,avx512vl")]
20015    unsafe fn test_mm256_movepi8_mask() {
20016        let a = _mm256_set1_epi8(1 << 7);
20017        let r = _mm256_movepi8_mask(a);
20018        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20019        assert_eq!(r, e);
20020    }
20021
20022    #[simd_test(enable = "avx512bw,avx512vl")]
20023    unsafe fn test_mm_movepi8_mask() {
20024        let a = _mm_set1_epi8(1 << 7);
20025        let r = _mm_movepi8_mask(a);
20026        let e: __mmask16 = 0b11111111_11111111;
20027        assert_eq!(r, e);
20028    }
20029
20030    #[simd_test(enable = "avx512bw")]
20031    unsafe fn test_mm512_movm_epi16() {
20032        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20033        let r = _mm512_movm_epi16(a);
20034        let e = _mm512_set1_epi16(
20035            1 << 15
20036                | 1 << 14
20037                | 1 << 13
20038                | 1 << 12
20039                | 1 << 11
20040                | 1 << 10
20041                | 1 << 9
20042                | 1 << 8
20043                | 1 << 7
20044                | 1 << 6
20045                | 1 << 5
20046                | 1 << 4
20047                | 1 << 3
20048                | 1 << 2
20049                | 1 << 1
20050                | 1 << 0,
20051        );
20052        assert_eq_m512i(r, e);
20053    }
20054
20055    #[simd_test(enable = "avx512bw,avx512vl")]
20056    unsafe fn test_mm256_movm_epi16() {
20057        let a: __mmask16 = 0b11111111_11111111;
20058        let r = _mm256_movm_epi16(a);
20059        let e = _mm256_set1_epi16(
20060            1 << 15
20061                | 1 << 14
20062                | 1 << 13
20063                | 1 << 12
20064                | 1 << 11
20065                | 1 << 10
20066                | 1 << 9
20067                | 1 << 8
20068                | 1 << 7
20069                | 1 << 6
20070                | 1 << 5
20071                | 1 << 4
20072                | 1 << 3
20073                | 1 << 2
20074                | 1 << 1
20075                | 1 << 0,
20076        );
20077        assert_eq_m256i(r, e);
20078    }
20079
20080    #[simd_test(enable = "avx512bw,avx512vl")]
20081    unsafe fn test_mm_movm_epi16() {
20082        let a: __mmask8 = 0b11111111;
20083        let r = _mm_movm_epi16(a);
20084        let e = _mm_set1_epi16(
20085            1 << 15
20086                | 1 << 14
20087                | 1 << 13
20088                | 1 << 12
20089                | 1 << 11
20090                | 1 << 10
20091                | 1 << 9
20092                | 1 << 8
20093                | 1 << 7
20094                | 1 << 6
20095                | 1 << 5
20096                | 1 << 4
20097                | 1 << 3
20098                | 1 << 2
20099                | 1 << 1
20100                | 1 << 0,
20101        );
20102        assert_eq_m128i(r, e);
20103    }
20104
20105    #[simd_test(enable = "avx512bw")]
20106    unsafe fn test_mm512_movm_epi8() {
20107        let a: __mmask64 =
20108            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20109        let r = _mm512_movm_epi8(a);
20110        let e =
20111            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20112        assert_eq_m512i(r, e);
20113    }
20114
20115    #[simd_test(enable = "avx512bw,avx512vl")]
20116    unsafe fn test_mm256_movm_epi8() {
20117        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20118        let r = _mm256_movm_epi8(a);
20119        let e =
20120            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20121        assert_eq_m256i(r, e);
20122    }
20123
20124    #[simd_test(enable = "avx512bw,avx512vl")]
20125    unsafe fn test_mm_movm_epi8() {
20126        let a: __mmask16 = 0b11111111_11111111;
20127        let r = _mm_movm_epi8(a);
20128        let e =
20129            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20130        assert_eq_m128i(r, e);
20131    }
20132
20133    #[simd_test(enable = "avx512bw")]
20134    unsafe fn test_cvtmask32_u32() {
20135        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
20136        let r = _cvtmask32_u32(a);
20137        let e: u32 = 0b11001100_00110011_01100110_10011001;
20138        assert_eq!(r, e);
20139    }
20140
20141    #[simd_test(enable = "avx512bw")]
20142    unsafe fn test_cvtu32_mask32() {
20143        let a: u32 = 0b11001100_00110011_01100110_10011001;
20144        let r = _cvtu32_mask32(a);
20145        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
20146        assert_eq!(r, e);
20147    }
20148
20149    #[simd_test(enable = "avx512bw")]
20150    unsafe fn test_kadd_mask32() {
20151        let a: __mmask32 = 11;
20152        let b: __mmask32 = 22;
20153        let r = _kadd_mask32(a, b);
20154        let e: __mmask32 = 33;
20155        assert_eq!(r, e);
20156    }
20157
20158    #[simd_test(enable = "avx512bw")]
20159    unsafe fn test_kadd_mask64() {
20160        let a: __mmask64 = 11;
20161        let b: __mmask64 = 22;
20162        let r = _kadd_mask64(a, b);
20163        let e: __mmask64 = 33;
20164        assert_eq!(r, e);
20165    }
20166
20167    #[simd_test(enable = "avx512bw")]
20168    unsafe fn test_kand_mask32() {
20169        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20170        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20171        let r = _kand_mask32(a, b);
20172        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
20173        assert_eq!(r, e);
20174    }
20175
20176    #[simd_test(enable = "avx512bw")]
20177    unsafe fn test_kand_mask64() {
20178        let a: __mmask64 =
20179            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20180        let b: __mmask64 =
20181            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20182        let r = _kand_mask64(a, b);
20183        let e: __mmask64 =
20184            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20185        assert_eq!(r, e);
20186    }
20187
20188    #[simd_test(enable = "avx512bw")]
20189    unsafe fn test_knot_mask32() {
20190        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20191        let r = _knot_mask32(a);
20192        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
20193        assert_eq!(r, e);
20194    }
20195
20196    #[simd_test(enable = "avx512bw")]
20197    unsafe fn test_knot_mask64() {
20198        let a: __mmask64 =
20199            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20200        let r = _knot_mask64(a);
20201        let e: __mmask64 =
20202            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20203        assert_eq!(r, e);
20204    }
20205
20206    #[simd_test(enable = "avx512bw")]
20207    unsafe fn test_kandn_mask32() {
20208        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20209        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20210        let r = _kandn_mask32(a, b);
20211        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20212        assert_eq!(r, e);
20213    }
20214
20215    #[simd_test(enable = "avx512bw")]
20216    unsafe fn test_kandn_mask64() {
20217        let a: __mmask64 =
20218            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20219        let b: __mmask64 =
20220            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20221        let r = _kandn_mask64(a, b);
20222        let e: __mmask64 =
20223            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20224        assert_eq!(r, e);
20225    }
20226
20227    #[simd_test(enable = "avx512bw")]
20228    unsafe fn test_kor_mask32() {
20229        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20230        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20231        let r = _kor_mask32(a, b);
20232        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20233        assert_eq!(r, e);
20234    }
20235
20236    #[simd_test(enable = "avx512bw")]
20237    unsafe fn test_kor_mask64() {
20238        let a: __mmask64 =
20239            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20240        let b: __mmask64 =
20241            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20242        let r = _kor_mask64(a, b);
20243        let e: __mmask64 =
20244            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20245        assert_eq!(r, e);
20246    }
20247
20248    #[simd_test(enable = "avx512bw")]
20249    unsafe fn test_kxor_mask32() {
20250        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20251        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20252        let r = _kxor_mask32(a, b);
20253        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20254        assert_eq!(r, e);
20255    }
20256
20257    #[simd_test(enable = "avx512bw")]
20258    unsafe fn test_kxor_mask64() {
20259        let a: __mmask64 =
20260            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20261        let b: __mmask64 =
20262            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20263        let r = _kxor_mask64(a, b);
20264        let e: __mmask64 =
20265            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20266        assert_eq!(r, e);
20267    }
20268
20269    #[simd_test(enable = "avx512bw")]
20270    unsafe fn test_kxnor_mask32() {
20271        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20272        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20273        let r = _kxnor_mask32(a, b);
20274        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20275        assert_eq!(r, e);
20276    }
20277
20278    #[simd_test(enable = "avx512bw")]
20279    unsafe fn test_kxnor_mask64() {
20280        let a: __mmask64 =
20281            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20282        let b: __mmask64 =
20283            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20284        let r = _kxnor_mask64(a, b);
20285        let e: __mmask64 =
20286            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20287        assert_eq!(r, e);
20288    }
20289
20290    #[simd_test(enable = "avx512bw")]
20291    unsafe fn test_kortest_mask32_u8() {
20292        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20293        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20294        let mut all_ones: u8 = 0;
20295        let r = _kortest_mask32_u8(a, b, &mut all_ones);
20296        assert_eq!(r, 0);
20297        assert_eq!(all_ones, 1);
20298    }
20299
20300    #[simd_test(enable = "avx512bw")]
20301    unsafe fn test_kortest_mask64_u8() {
20302        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20303        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20304        let mut all_ones: u8 = 0;
20305        let r = _kortest_mask64_u8(a, b, &mut all_ones);
20306        assert_eq!(r, 0);
20307        assert_eq!(all_ones, 0);
20308    }
20309
20310    #[simd_test(enable = "avx512bw")]
20311    unsafe fn test_kortestc_mask32_u8() {
20312        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20313        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20314        let r = _kortestc_mask32_u8(a, b);
20315        assert_eq!(r, 1);
20316    }
20317
20318    #[simd_test(enable = "avx512bw")]
20319    unsafe fn test_kortestc_mask64_u8() {
20320        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20321        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20322        let r = _kortestc_mask64_u8(a, b);
20323        assert_eq!(r, 0);
20324    }
20325
20326    #[simd_test(enable = "avx512bw")]
20327    unsafe fn test_kortestz_mask32_u8() {
20328        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20329        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20330        let r = _kortestz_mask32_u8(a, b);
20331        assert_eq!(r, 0);
20332    }
20333
20334    #[simd_test(enable = "avx512bw")]
20335    unsafe fn test_kortestz_mask64_u8() {
20336        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20337        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20338        let r = _kortestz_mask64_u8(a, b);
20339        assert_eq!(r, 0);
20340    }
20341
20342    #[simd_test(enable = "avx512bw")]
20343    unsafe fn test_kshiftli_mask32() {
20344        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20345        let r = _kshiftli_mask32::<3>(a);
20346        let e: __mmask32 = 0b0100101101001011_0100101101001000;
20347        assert_eq!(r, e);
20348
20349        let r = _kshiftli_mask32::<31>(a);
20350        let e: __mmask32 = 0b1000000000000000_0000000000000000;
20351        assert_eq!(r, e);
20352
20353        let r = _kshiftli_mask32::<32>(a);
20354        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20355        assert_eq!(r, e);
20356
20357        let r = _kshiftli_mask32::<33>(a);
20358        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20359        assert_eq!(r, e);
20360    }
20361
20362    #[simd_test(enable = "avx512bw")]
20363    unsafe fn test_kshiftli_mask64() {
20364        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20365        let r = _kshiftli_mask64::<3>(a);
20366        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
20367        assert_eq!(r, e);
20368
20369        let r = _kshiftli_mask64::<63>(a);
20370        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
20371        assert_eq!(r, e);
20372
20373        let r = _kshiftli_mask64::<64>(a);
20374        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20375        assert_eq!(r, e);
20376
20377        let r = _kshiftli_mask64::<65>(a);
20378        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20379        assert_eq!(r, e);
20380    }
20381
20382    #[simd_test(enable = "avx512bw")]
20383    unsafe fn test_kshiftri_mask32() {
20384        let a: __mmask32 = 0b1010100101101001_0110100101101001;
20385        let r = _kshiftri_mask32::<3>(a);
20386        let e: __mmask32 = 0b0001010100101101_0010110100101101;
20387        assert_eq!(r, e);
20388
20389        let r = _kshiftri_mask32::<31>(a);
20390        let e: __mmask32 = 0b0000000000000000_0000000000000001;
20391        assert_eq!(r, e);
20392
20393        let r = _kshiftri_mask32::<32>(a);
20394        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20395        assert_eq!(r, e);
20396
20397        let r = _kshiftri_mask32::<33>(a);
20398        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20399        assert_eq!(r, e);
20400    }
20401
20402    #[simd_test(enable = "avx512bw")]
20403    unsafe fn test_kshiftri_mask64() {
20404        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
20405        let r = _kshiftri_mask64::<3>(a);
20406        let e: __mmask64 = 0b1010100101101001_0110100101101001;
20407        assert_eq!(r, e);
20408
20409        let r = _kshiftri_mask64::<34>(a);
20410        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
20411        assert_eq!(r, e);
20412
20413        let r = _kshiftri_mask64::<35>(a);
20414        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20415        assert_eq!(r, e);
20416
20417        let r = _kshiftri_mask64::<64>(a);
20418        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20419        assert_eq!(r, e);
20420
20421        let r = _kshiftri_mask64::<65>(a);
20422        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20423        assert_eq!(r, e);
20424    }
20425
20426    #[simd_test(enable = "avx512bw")]
20427    unsafe fn test_ktest_mask32_u8() {
20428        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20429        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20430        let mut and_not: u8 = 0;
20431        let r = _ktest_mask32_u8(a, b, &mut and_not);
20432        assert_eq!(r, 1);
20433        assert_eq!(and_not, 0);
20434    }
20435
20436    #[simd_test(enable = "avx512bw")]
20437    unsafe fn test_ktestc_mask32_u8() {
20438        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20439        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20440        let r = _ktestc_mask32_u8(a, b);
20441        assert_eq!(r, 0);
20442    }
20443
20444    #[simd_test(enable = "avx512bw")]
20445    unsafe fn test_ktestz_mask32_u8() {
20446        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20447        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20448        let r = _ktestz_mask32_u8(a, b);
20449        assert_eq!(r, 1);
20450    }
20451
20452    #[simd_test(enable = "avx512bw")]
20453    unsafe fn test_ktest_mask64_u8() {
20454        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20455        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20456        let mut and_not: u8 = 0;
20457        let r = _ktest_mask64_u8(a, b, &mut and_not);
20458        assert_eq!(r, 1);
20459        assert_eq!(and_not, 0);
20460    }
20461
20462    #[simd_test(enable = "avx512bw")]
20463    unsafe fn test_ktestc_mask64_u8() {
20464        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20465        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20466        let r = _ktestc_mask64_u8(a, b);
20467        assert_eq!(r, 0);
20468    }
20469
20470    #[simd_test(enable = "avx512bw")]
20471    unsafe fn test_ktestz_mask64_u8() {
20472        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20473        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20474        let r = _ktestz_mask64_u8(a, b);
20475        assert_eq!(r, 1);
20476    }
20477
20478    #[simd_test(enable = "avx512bw")]
20479    unsafe fn test_mm512_kunpackw() {
20480        let a: u32 = 0x00110011;
20481        let b: u32 = 0x00001011;
20482        let r = _mm512_kunpackw(a, b);
20483        let e: u32 = 0x00111011;
20484        assert_eq!(r, e);
20485    }
20486
20487    #[simd_test(enable = "avx512bw")]
20488    unsafe fn test_mm512_kunpackd() {
20489        let a: u64 = 0x11001100_00110011;
20490        let b: u64 = 0x00101110_00001011;
20491        let r = _mm512_kunpackd(a, b);
20492        let e: u64 = 0x00110011_00001011;
20493        assert_eq!(r, e);
20494    }
20495
20496    #[simd_test(enable = "avx512bw")]
20497    unsafe fn test_mm512_cvtepi16_epi8() {
20498        let a = _mm512_set1_epi16(2);
20499        let r = _mm512_cvtepi16_epi8(a);
20500        let e = _mm256_set1_epi8(2);
20501        assert_eq_m256i(r, e);
20502    }
20503
20504    #[simd_test(enable = "avx512bw")]
20505    unsafe fn test_mm512_mask_cvtepi16_epi8() {
20506        let src = _mm256_set1_epi8(1);
20507        let a = _mm512_set1_epi16(2);
20508        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
20509        assert_eq_m256i(r, src);
20510        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20511        let e = _mm256_set1_epi8(2);
20512        assert_eq_m256i(r, e);
20513    }
20514
20515    #[simd_test(enable = "avx512bw")]
20516    unsafe fn test_mm512_maskz_cvtepi16_epi8() {
20517        let a = _mm512_set1_epi16(2);
20518        let r = _mm512_maskz_cvtepi16_epi8(0, a);
20519        assert_eq_m256i(r, _mm256_setzero_si256());
20520        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20521        let e = _mm256_set1_epi8(2);
20522        assert_eq_m256i(r, e);
20523    }
20524
20525    #[simd_test(enable = "avx512bw,avx512vl")]
20526    unsafe fn test_mm256_cvtepi16_epi8() {
20527        let a = _mm256_set1_epi16(2);
20528        let r = _mm256_cvtepi16_epi8(a);
20529        let e = _mm_set1_epi8(2);
20530        assert_eq_m128i(r, e);
20531    }
20532
20533    #[simd_test(enable = "avx512bw,avx512vl")]
20534    unsafe fn test_mm256_mask_cvtepi16_epi8() {
20535        let src = _mm_set1_epi8(1);
20536        let a = _mm256_set1_epi16(2);
20537        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
20538        assert_eq_m128i(r, src);
20539        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
20540        let e = _mm_set1_epi8(2);
20541        assert_eq_m128i(r, e);
20542    }
20543
20544    #[simd_test(enable = "avx512bw,avx512vl")]
20545    unsafe fn test_mm256_maskz_cvtepi16_epi8() {
20546        let a = _mm256_set1_epi16(2);
20547        let r = _mm256_maskz_cvtepi16_epi8(0, a);
20548        assert_eq_m128i(r, _mm_setzero_si128());
20549        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
20550        let e = _mm_set1_epi8(2);
20551        assert_eq_m128i(r, e);
20552    }
20553
20554    #[simd_test(enable = "avx512bw,avx512vl")]
20555    unsafe fn test_mm_cvtepi16_epi8() {
20556        let a = _mm_set1_epi16(2);
20557        let r = _mm_cvtepi16_epi8(a);
20558        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20559        assert_eq_m128i(r, e);
20560    }
20561
20562    #[simd_test(enable = "avx512bw,avx512vl")]
20563    unsafe fn test_mm_mask_cvtepi16_epi8() {
20564        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20565        let a = _mm_set1_epi16(2);
20566        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
20567        assert_eq_m128i(r, src);
20568        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
20569        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20570        assert_eq_m128i(r, e);
20571    }
20572
20573    #[simd_test(enable = "avx512bw,avx512vl")]
20574    unsafe fn test_mm_maskz_cvtepi16_epi8() {
20575        let a = _mm_set1_epi16(2);
20576        let r = _mm_maskz_cvtepi16_epi8(0, a);
20577        assert_eq_m128i(r, _mm_setzero_si128());
20578        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
20579        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20580        assert_eq_m128i(r, e);
20581    }
20582
20583    #[simd_test(enable = "avx512bw")]
20584    unsafe fn test_mm512_cvtsepi16_epi8() {
20585        let a = _mm512_set1_epi16(i16::MAX);
20586        let r = _mm512_cvtsepi16_epi8(a);
20587        let e = _mm256_set1_epi8(i8::MAX);
20588        assert_eq_m256i(r, e);
20589    }
20590
20591    #[simd_test(enable = "avx512bw")]
20592    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
20593        let src = _mm256_set1_epi8(1);
20594        let a = _mm512_set1_epi16(i16::MAX);
20595        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
20596        assert_eq_m256i(r, src);
20597        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20598        let e = _mm256_set1_epi8(i8::MAX);
20599        assert_eq_m256i(r, e);
20600    }
20601
20602    #[simd_test(enable = "avx512bw,avx512vl")]
20603    unsafe fn test_mm256_cvtsepi16_epi8() {
20604        let a = _mm256_set1_epi16(i16::MAX);
20605        let r = _mm256_cvtsepi16_epi8(a);
20606        let e = _mm_set1_epi8(i8::MAX);
20607        assert_eq_m128i(r, e);
20608    }
20609
20610    #[simd_test(enable = "avx512bw,avx512vl")]
20611    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
20612        let src = _mm_set1_epi8(1);
20613        let a = _mm256_set1_epi16(i16::MAX);
20614        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
20615        assert_eq_m128i(r, src);
20616        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
20617        let e = _mm_set1_epi8(i8::MAX);
20618        assert_eq_m128i(r, e);
20619    }
20620
20621    #[simd_test(enable = "avx512bw,avx512vl")]
20622    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
20623        let a = _mm256_set1_epi16(i16::MAX);
20624        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
20625        assert_eq_m128i(r, _mm_setzero_si128());
20626        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
20627        let e = _mm_set1_epi8(i8::MAX);
20628        assert_eq_m128i(r, e);
20629    }
20630
20631    #[simd_test(enable = "avx512bw,avx512vl")]
20632    unsafe fn test_mm_cvtsepi16_epi8() {
20633        let a = _mm_set1_epi16(i16::MAX);
20634        let r = _mm_cvtsepi16_epi8(a);
20635        #[rustfmt::skip]
20636        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20637        assert_eq_m128i(r, e);
20638    }
20639
20640    #[simd_test(enable = "avx512bw,avx512vl")]
20641    unsafe fn test_mm_mask_cvtsepi16_epi8() {
20642        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20643        let a = _mm_set1_epi16(i16::MAX);
20644        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
20645        assert_eq_m128i(r, src);
20646        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
20647        #[rustfmt::skip]
20648        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20649        assert_eq_m128i(r, e);
20650    }
20651
20652    #[simd_test(enable = "avx512bw,avx512vl")]
20653    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
20654        let a = _mm_set1_epi16(i16::MAX);
20655        let r = _mm_maskz_cvtsepi16_epi8(0, a);
20656        assert_eq_m128i(r, _mm_setzero_si128());
20657        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
20658        #[rustfmt::skip]
20659        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20660        assert_eq_m128i(r, e);
20661    }
20662
20663    #[simd_test(enable = "avx512bw")]
20664    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
20665        let a = _mm512_set1_epi16(i16::MAX);
20666        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
20667        assert_eq_m256i(r, _mm256_setzero_si256());
20668        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20669        let e = _mm256_set1_epi8(i8::MAX);
20670        assert_eq_m256i(r, e);
20671    }
20672
20673    #[simd_test(enable = "avx512bw")]
20674    unsafe fn test_mm512_cvtusepi16_epi8() {
20675        let a = _mm512_set1_epi16(i16::MIN);
20676        let r = _mm512_cvtusepi16_epi8(a);
20677        let e = _mm256_set1_epi8(-1);
20678        assert_eq_m256i(r, e);
20679    }
20680
20681    #[simd_test(enable = "avx512bw")]
20682    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
20683        let src = _mm256_set1_epi8(1);
20684        let a = _mm512_set1_epi16(i16::MIN);
20685        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
20686        assert_eq_m256i(r, src);
20687        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20688        let e = _mm256_set1_epi8(-1);
20689        assert_eq_m256i(r, e);
20690    }
20691
20692    #[simd_test(enable = "avx512bw")]
20693    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
20694        let a = _mm512_set1_epi16(i16::MIN);
20695        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
20696        assert_eq_m256i(r, _mm256_setzero_si256());
20697        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20698        let e = _mm256_set1_epi8(-1);
20699        assert_eq_m256i(r, e);
20700    }
20701
20702    #[simd_test(enable = "avx512bw,avx512vl")]
20703    unsafe fn test_mm256_cvtusepi16_epi8() {
20704        let a = _mm256_set1_epi16(i16::MIN);
20705        let r = _mm256_cvtusepi16_epi8(a);
20706        let e = _mm_set1_epi8(-1);
20707        assert_eq_m128i(r, e);
20708    }
20709
20710    #[simd_test(enable = "avx512bw,avx512vl")]
20711    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
20712        let src = _mm_set1_epi8(1);
20713        let a = _mm256_set1_epi16(i16::MIN);
20714        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
20715        assert_eq_m128i(r, src);
20716        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
20717        let e = _mm_set1_epi8(-1);
20718        assert_eq_m128i(r, e);
20719    }
20720
20721    #[simd_test(enable = "avx512bw,avx512vl")]
20722    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
20723        let a = _mm256_set1_epi16(i16::MIN);
20724        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
20725        assert_eq_m128i(r, _mm_setzero_si128());
20726        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
20727        let e = _mm_set1_epi8(-1);
20728        assert_eq_m128i(r, e);
20729    }
20730
20731    #[simd_test(enable = "avx512bw,avx512vl")]
20732    unsafe fn test_mm_cvtusepi16_epi8() {
20733        let a = _mm_set1_epi16(i16::MIN);
20734        let r = _mm_cvtusepi16_epi8(a);
20735        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20736        assert_eq_m128i(r, e);
20737    }
20738
20739    #[simd_test(enable = "avx512bw,avx512vl")]
20740    unsafe fn test_mm_mask_cvtusepi16_epi8() {
20741        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20742        let a = _mm_set1_epi16(i16::MIN);
20743        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
20744        assert_eq_m128i(r, src);
20745        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
20746        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20747        assert_eq_m128i(r, e);
20748    }
20749
20750    #[simd_test(enable = "avx512bw,avx512vl")]
20751    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
20752        let a = _mm_set1_epi16(i16::MIN);
20753        let r = _mm_maskz_cvtusepi16_epi8(0, a);
20754        assert_eq_m128i(r, _mm_setzero_si128());
20755        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
20756        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20757        assert_eq_m128i(r, e);
20758    }
20759
20760    #[simd_test(enable = "avx512bw")]
20761    unsafe fn test_mm512_cvtepi8_epi16() {
20762        let a = _mm256_set1_epi8(2);
20763        let r = _mm512_cvtepi8_epi16(a);
20764        let e = _mm512_set1_epi16(2);
20765        assert_eq_m512i(r, e);
20766    }
20767
20768    #[simd_test(enable = "avx512bw")]
20769    unsafe fn test_mm512_mask_cvtepi8_epi16() {
20770        let src = _mm512_set1_epi16(1);
20771        let a = _mm256_set1_epi8(2);
20772        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
20773        assert_eq_m512i(r, src);
20774        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20775        let e = _mm512_set1_epi16(2);
20776        assert_eq_m512i(r, e);
20777    }
20778
20779    #[simd_test(enable = "avx512bw")]
20780    unsafe fn test_mm512_maskz_cvtepi8_epi16() {
20781        let a = _mm256_set1_epi8(2);
20782        let r = _mm512_maskz_cvtepi8_epi16(0, a);
20783        assert_eq_m512i(r, _mm512_setzero_si512());
20784        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
20785        let e = _mm512_set1_epi16(2);
20786        assert_eq_m512i(r, e);
20787    }
20788
20789    #[simd_test(enable = "avx512bw,avx512vl")]
20790    unsafe fn test_mm256_mask_cvtepi8_epi16() {
20791        let src = _mm256_set1_epi16(1);
20792        let a = _mm_set1_epi8(2);
20793        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
20794        assert_eq_m256i(r, src);
20795        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
20796        let e = _mm256_set1_epi16(2);
20797        assert_eq_m256i(r, e);
20798    }
20799
20800    #[simd_test(enable = "avx512bw,avx512vl")]
20801    unsafe fn test_mm256_maskz_cvtepi8_epi16() {
20802        let a = _mm_set1_epi8(2);
20803        let r = _mm256_maskz_cvtepi8_epi16(0, a);
20804        assert_eq_m256i(r, _mm256_setzero_si256());
20805        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
20806        let e = _mm256_set1_epi16(2);
20807        assert_eq_m256i(r, e);
20808    }
20809
20810    #[simd_test(enable = "avx512bw,avx512vl")]
20811    unsafe fn test_mm_mask_cvtepi8_epi16() {
20812        let src = _mm_set1_epi16(1);
20813        let a = _mm_set1_epi8(2);
20814        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
20815        assert_eq_m128i(r, src);
20816        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
20817        let e = _mm_set1_epi16(2);
20818        assert_eq_m128i(r, e);
20819    }
20820
20821    #[simd_test(enable = "avx512bw,avx512vl")]
20822    unsafe fn test_mm_maskz_cvtepi8_epi16() {
20823        let a = _mm_set1_epi8(2);
20824        let r = _mm_maskz_cvtepi8_epi16(0, a);
20825        assert_eq_m128i(r, _mm_setzero_si128());
20826        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
20827        let e = _mm_set1_epi16(2);
20828        assert_eq_m128i(r, e);
20829    }
20830
20831    #[simd_test(enable = "avx512bw")]
20832    unsafe fn test_mm512_cvtepu8_epi16() {
20833        let a = _mm256_set1_epi8(2);
20834        let r = _mm512_cvtepu8_epi16(a);
20835        let e = _mm512_set1_epi16(2);
20836        assert_eq_m512i(r, e);
20837    }
20838
20839    #[simd_test(enable = "avx512bw")]
20840    unsafe fn test_mm512_mask_cvtepu8_epi16() {
20841        let src = _mm512_set1_epi16(1);
20842        let a = _mm256_set1_epi8(2);
20843        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
20844        assert_eq_m512i(r, src);
20845        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20846        let e = _mm512_set1_epi16(2);
20847        assert_eq_m512i(r, e);
20848    }
20849
20850    #[simd_test(enable = "avx512bw")]
20851    unsafe fn test_mm512_maskz_cvtepu8_epi16() {
20852        let a = _mm256_set1_epi8(2);
20853        let r = _mm512_maskz_cvtepu8_epi16(0, a);
20854        assert_eq_m512i(r, _mm512_setzero_si512());
20855        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
20856        let e = _mm512_set1_epi16(2);
20857        assert_eq_m512i(r, e);
20858    }
20859
20860    #[simd_test(enable = "avx512bw,avx512vl")]
20861    unsafe fn test_mm256_mask_cvtepu8_epi16() {
20862        let src = _mm256_set1_epi16(1);
20863        let a = _mm_set1_epi8(2);
20864        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
20865        assert_eq_m256i(r, src);
20866        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
20867        let e = _mm256_set1_epi16(2);
20868        assert_eq_m256i(r, e);
20869    }
20870
20871    #[simd_test(enable = "avx512bw,avx512vl")]
20872    unsafe fn test_mm256_maskz_cvtepu8_epi16() {
20873        let a = _mm_set1_epi8(2);
20874        let r = _mm256_maskz_cvtepu8_epi16(0, a);
20875        assert_eq_m256i(r, _mm256_setzero_si256());
20876        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
20877        let e = _mm256_set1_epi16(2);
20878        assert_eq_m256i(r, e);
20879    }
20880
20881    #[simd_test(enable = "avx512bw,avx512vl")]
20882    unsafe fn test_mm_mask_cvtepu8_epi16() {
20883        let src = _mm_set1_epi16(1);
20884        let a = _mm_set1_epi8(2);
20885        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
20886        assert_eq_m128i(r, src);
20887        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
20888        let e = _mm_set1_epi16(2);
20889        assert_eq_m128i(r, e);
20890    }
20891
20892    #[simd_test(enable = "avx512bw,avx512vl")]
20893    unsafe fn test_mm_maskz_cvtepu8_epi16() {
20894        let a = _mm_set1_epi8(2);
20895        let r = _mm_maskz_cvtepu8_epi16(0, a);
20896        assert_eq_m128i(r, _mm_setzero_si128());
20897        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
20898        let e = _mm_set1_epi16(2);
20899        assert_eq_m128i(r, e);
20900    }
20901
20902    #[simd_test(enable = "avx512bw")]
20903    unsafe fn test_mm512_bslli_epi128() {
20904        #[rustfmt::skip]
20905        let a = _mm512_set_epi8(
20906            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20907            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20908            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20909            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20910        );
20911        let r = _mm512_bslli_epi128::<9>(a);
20912        #[rustfmt::skip]
20913        let e = _mm512_set_epi8(
20914            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20915            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20916            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20917            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20918        );
20919        assert_eq_m512i(r, e);
20920    }
20921
20922    #[simd_test(enable = "avx512bw")]
20923    unsafe fn test_mm512_bsrli_epi128() {
20924        #[rustfmt::skip]
20925        let a = _mm512_set_epi8(
20926            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
20927            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
20928            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
20929            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
20930        );
20931        let r = _mm512_bsrli_epi128::<3>(a);
20932        #[rustfmt::skip]
20933        let e = _mm512_set_epi8(
20934            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
20935            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
20936            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
20937            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
20938        );
20939        assert_eq_m512i(r, e);
20940    }
20941
20942    #[simd_test(enable = "avx512bw")]
20943    unsafe fn test_mm512_alignr_epi8() {
20944        #[rustfmt::skip]
20945        let a = _mm512_set_epi8(
20946            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20947            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20948            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20949            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20950        );
20951        let b = _mm512_set1_epi8(1);
20952        let r = _mm512_alignr_epi8::<14>(a, b);
20953        #[rustfmt::skip]
20954        let e = _mm512_set_epi8(
20955            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20956            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20957            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20958            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20959        );
20960        assert_eq_m512i(r, e);
20961    }
20962
20963    #[simd_test(enable = "avx512bw")]
20964    unsafe fn test_mm512_mask_alignr_epi8() {
20965        #[rustfmt::skip]
20966        let a = _mm512_set_epi8(
20967            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20968            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20969            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20970            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20971        );
20972        let b = _mm512_set1_epi8(1);
20973        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
20974        assert_eq_m512i(r, a);
20975        let r = _mm512_mask_alignr_epi8::<14>(
20976            a,
20977            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20978            a,
20979            b,
20980        );
20981        #[rustfmt::skip]
20982        let e = _mm512_set_epi8(
20983            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20984            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20985            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20986            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20987        );
20988        assert_eq_m512i(r, e);
20989    }
20990
20991    #[simd_test(enable = "avx512bw")]
20992    unsafe fn test_mm512_maskz_alignr_epi8() {
20993        #[rustfmt::skip]
20994        let a = _mm512_set_epi8(
20995            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20996            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20997            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20998            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20999        );
21000        let b = _mm512_set1_epi8(1);
21001        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
21002        assert_eq_m512i(r, _mm512_setzero_si512());
21003        let r = _mm512_maskz_alignr_epi8::<14>(
21004            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21005            a,
21006            b,
21007        );
21008        #[rustfmt::skip]
21009        let e = _mm512_set_epi8(
21010            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21011            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21012            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21013            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21014        );
21015        assert_eq_m512i(r, e);
21016    }
21017
21018    #[simd_test(enable = "avx512bw,avx512vl")]
21019    unsafe fn test_mm256_mask_alignr_epi8() {
21020        #[rustfmt::skip]
21021        let a = _mm256_set_epi8(
21022            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21023            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21024        );
21025        let b = _mm256_set1_epi8(1);
21026        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21027        assert_eq_m256i(r, a);
21028        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21029        #[rustfmt::skip]
21030        let e = _mm256_set_epi8(
21031            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21032            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21033        );
21034        assert_eq_m256i(r, e);
21035    }
21036
21037    #[simd_test(enable = "avx512bw,avx512vl")]
21038    unsafe fn test_mm256_maskz_alignr_epi8() {
21039        #[rustfmt::skip]
21040        let a = _mm256_set_epi8(
21041            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21042            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21043        );
21044        let b = _mm256_set1_epi8(1);
21045        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21046        assert_eq_m256i(r, _mm256_setzero_si256());
21047        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21048        #[rustfmt::skip]
21049        let e = _mm256_set_epi8(
21050            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21051            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21052        );
21053        assert_eq_m256i(r, e);
21054    }
21055
21056    #[simd_test(enable = "avx512bw,avx512vl")]
21057    unsafe fn test_mm_mask_alignr_epi8() {
21058        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21059        let b = _mm_set1_epi8(1);
21060        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21061        assert_eq_m128i(r, a);
21062        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21063        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21064        assert_eq_m128i(r, e);
21065    }
21066
21067    #[simd_test(enable = "avx512bw,avx512vl")]
21068    unsafe fn test_mm_maskz_alignr_epi8() {
21069        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21070        let b = _mm_set1_epi8(1);
21071        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21072        assert_eq_m128i(r, _mm_setzero_si128());
21073        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21074        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21075        assert_eq_m128i(r, e);
21076    }
21077
21078    #[simd_test(enable = "avx512bw")]
21079    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21080        let a = _mm512_set1_epi16(i16::MAX);
21081        let mut r = _mm256_undefined_si256();
21082        _mm512_mask_cvtsepi16_storeu_epi8(
21083            &mut r as *mut _ as *mut i8,
21084            0b11111111_11111111_11111111_11111111,
21085            a,
21086        );
21087        let e = _mm256_set1_epi8(i8::MAX);
21088        assert_eq_m256i(r, e);
21089    }
21090
21091    #[simd_test(enable = "avx512bw,avx512vl")]
21092    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21093        let a = _mm256_set1_epi16(i16::MAX);
21094        let mut r = _mm_undefined_si128();
21095        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21096        let e = _mm_set1_epi8(i8::MAX);
21097        assert_eq_m128i(r, e);
21098    }
21099
21100    #[simd_test(enable = "avx512bw,avx512vl")]
21101    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
21102        let a = _mm_set1_epi16(i16::MAX);
21103        let mut r = _mm_set1_epi8(0);
21104        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21105        #[rustfmt::skip]
21106        let e = _mm_set_epi8(
21107            0, 0, 0, 0, 0, 0, 0, 0,
21108            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
21109        );
21110        assert_eq_m128i(r, e);
21111    }
21112
21113    #[simd_test(enable = "avx512bw")]
21114    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
21115        let a = _mm512_set1_epi16(8);
21116        let mut r = _mm256_undefined_si256();
21117        _mm512_mask_cvtepi16_storeu_epi8(
21118            &mut r as *mut _ as *mut i8,
21119            0b11111111_11111111_11111111_11111111,
21120            a,
21121        );
21122        let e = _mm256_set1_epi8(8);
21123        assert_eq_m256i(r, e);
21124    }
21125
21126    #[simd_test(enable = "avx512bw,avx512vl")]
21127    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
21128        let a = _mm256_set1_epi16(8);
21129        let mut r = _mm_undefined_si128();
21130        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21131        let e = _mm_set1_epi8(8);
21132        assert_eq_m128i(r, e);
21133    }
21134
21135    #[simd_test(enable = "avx512bw,avx512vl")]
21136    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
21137        let a = _mm_set1_epi16(8);
21138        let mut r = _mm_set1_epi8(0);
21139        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21140        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
21141        assert_eq_m128i(r, e);
21142    }
21143
21144    #[simd_test(enable = "avx512bw")]
21145    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
21146        let a = _mm512_set1_epi16(i16::MAX);
21147        let mut r = _mm256_undefined_si256();
21148        _mm512_mask_cvtusepi16_storeu_epi8(
21149            &mut r as *mut _ as *mut i8,
21150            0b11111111_11111111_11111111_11111111,
21151            a,
21152        );
21153        let e = _mm256_set1_epi8(u8::MAX as i8);
21154        assert_eq_m256i(r, e);
21155    }
21156
21157    #[simd_test(enable = "avx512bw,avx512vl")]
21158    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
21159        let a = _mm256_set1_epi16(i16::MAX);
21160        let mut r = _mm_undefined_si128();
21161        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21162        let e = _mm_set1_epi8(u8::MAX as i8);
21163        assert_eq_m128i(r, e);
21164    }
21165
21166    #[simd_test(enable = "avx512bw,avx512vl")]
21167    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
21168        let a = _mm_set1_epi16(i16::MAX);
21169        let mut r = _mm_set1_epi8(0);
21170        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21171        #[rustfmt::skip]
21172        let e = _mm_set_epi8(
21173            0, 0, 0, 0,
21174            0, 0, 0, 0,
21175            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, 
21176            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
21177        );
21178        assert_eq_m128i(r, e);
21179    }
21180}