1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35 unsafe {
36 let a = a.as_i32x8();
37 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
38 transmute(r)
39 }
40}
41
42#[inline]
46#[target_feature(enable = "avx2")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[stable(feature = "simd_x86", since = "1.27.0")]
49pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
50 unsafe {
51 let a = a.as_i16x16();
52 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
53 transmute(r)
54 }
55}
56
57#[inline]
61#[target_feature(enable = "avx2")]
62#[cfg_attr(test, assert_instr(vpabsb))]
63#[stable(feature = "simd_x86", since = "1.27.0")]
64pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
65 unsafe {
66 let a = a.as_i8x32();
67 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
68 transmute(r)
69 }
70}
71
72#[inline]
76#[target_feature(enable = "avx2")]
77#[cfg_attr(test, assert_instr(vpaddq))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
80 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
81}
82
83#[inline]
87#[target_feature(enable = "avx2")]
88#[cfg_attr(test, assert_instr(vpaddd))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
91 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "avx2")]
99#[cfg_attr(test, assert_instr(vpaddw))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
102 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
103}
104
105#[inline]
109#[target_feature(enable = "avx2")]
110#[cfg_attr(test, assert_instr(vpaddb))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
113 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
114}
115
116#[inline]
120#[target_feature(enable = "avx2")]
121#[cfg_attr(test, assert_instr(vpaddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
125}
126
127#[inline]
131#[target_feature(enable = "avx2")]
132#[cfg_attr(test, assert_instr(vpaddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
136}
137
138#[inline]
142#[target_feature(enable = "avx2")]
143#[cfg_attr(test, assert_instr(vpaddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
147}
148
149#[inline]
153#[target_feature(enable = "avx2")]
154#[cfg_attr(test, assert_instr(vpaddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
158}
159
160#[inline]
165#[target_feature(enable = "avx2")]
166#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
167#[rustc_legacy_const_generics(2)]
168#[stable(feature = "simd_x86", since = "1.27.0")]
169pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
170 static_assert_uimm_bits!(IMM8, 8);
171
172 if IMM8 >= 32 {
175 return _mm256_setzero_si256();
176 }
177 let (a, b) = if IMM8 > 16 {
180 (_mm256_setzero_si256(), a)
181 } else {
182 (a, b)
183 };
184 unsafe {
185 if IMM8 == 16 {
186 return transmute(a);
187 }
188 }
189 const fn mask(shift: u32, i: u32) -> u32 {
190 let shift = shift % 16;
191 let mod_i = i % 16;
192 if mod_i < (16 - shift) {
193 i + shift
194 } else {
195 i + 16 + shift
196 }
197 }
198
199 unsafe {
200 let r: i8x32 = simd_shuffle!(
201 b.as_i8x32(),
202 a.as_i8x32(),
203 [
204 mask(IMM8 as u32, 0),
205 mask(IMM8 as u32, 1),
206 mask(IMM8 as u32, 2),
207 mask(IMM8 as u32, 3),
208 mask(IMM8 as u32, 4),
209 mask(IMM8 as u32, 5),
210 mask(IMM8 as u32, 6),
211 mask(IMM8 as u32, 7),
212 mask(IMM8 as u32, 8),
213 mask(IMM8 as u32, 9),
214 mask(IMM8 as u32, 10),
215 mask(IMM8 as u32, 11),
216 mask(IMM8 as u32, 12),
217 mask(IMM8 as u32, 13),
218 mask(IMM8 as u32, 14),
219 mask(IMM8 as u32, 15),
220 mask(IMM8 as u32, 16),
221 mask(IMM8 as u32, 17),
222 mask(IMM8 as u32, 18),
223 mask(IMM8 as u32, 19),
224 mask(IMM8 as u32, 20),
225 mask(IMM8 as u32, 21),
226 mask(IMM8 as u32, 22),
227 mask(IMM8 as u32, 23),
228 mask(IMM8 as u32, 24),
229 mask(IMM8 as u32, 25),
230 mask(IMM8 as u32, 26),
231 mask(IMM8 as u32, 27),
232 mask(IMM8 as u32, 28),
233 mask(IMM8 as u32, 29),
234 mask(IMM8 as u32, 30),
235 mask(IMM8 as u32, 31),
236 ],
237 );
238 transmute(r)
239 }
240}
241
242#[inline]
247#[target_feature(enable = "avx2")]
248#[cfg_attr(test, assert_instr(vandps))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
251 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandnps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
263 unsafe {
264 let all_ones = _mm256_set1_epi8(-1);
265 transmute(simd_and(
266 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
267 b.as_i64x4(),
268 ))
269 }
270}
271
272#[inline]
276#[target_feature(enable = "avx2")]
277#[cfg_attr(test, assert_instr(vpavgw))]
278#[stable(feature = "simd_x86", since = "1.27.0")]
279pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
280 unsafe {
281 let a = simd_cast::<_, u32x16>(a.as_u16x16());
282 let b = simd_cast::<_, u32x16>(b.as_u16x16());
283 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
284 transmute(simd_cast::<_, u16x16>(r))
285 }
286}
287
288#[inline]
292#[target_feature(enable = "avx2")]
293#[cfg_attr(test, assert_instr(vpavgb))]
294#[stable(feature = "simd_x86", since = "1.27.0")]
295pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
296 unsafe {
297 let a = simd_cast::<_, u16x32>(a.as_u8x32());
298 let b = simd_cast::<_, u16x32>(b.as_u8x32());
299 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
300 transmute(simd_cast::<_, u8x32>(r))
301 }
302}
303
304#[inline]
308#[target_feature(enable = "avx2")]
309#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
310#[rustc_legacy_const_generics(2)]
311#[stable(feature = "simd_x86", since = "1.27.0")]
312pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
313 static_assert_uimm_bits!(IMM4, 4);
314 unsafe {
315 let a = a.as_i32x4();
316 let b = b.as_i32x4();
317 let r: i32x4 = simd_shuffle!(
318 a,
319 b,
320 [
321 [0, 4, 0, 4][IMM4 as usize & 0b11],
322 [1, 1, 5, 5][IMM4 as usize & 0b11],
323 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
324 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
325 ],
326 );
327 transmute(r)
328 }
329}
330
331#[inline]
335#[target_feature(enable = "avx2")]
336#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
337#[rustc_legacy_const_generics(2)]
338#[stable(feature = "simd_x86", since = "1.27.0")]
339pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
340 static_assert_uimm_bits!(IMM8, 8);
341 unsafe {
342 let a = a.as_i32x8();
343 let b = b.as_i32x8();
344 let r: i32x8 = simd_shuffle!(
345 a,
346 b,
347 [
348 [0, 8, 0, 8][IMM8 as usize & 0b11],
349 [1, 1, 9, 9][IMM8 as usize & 0b11],
350 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
351 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
352 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
353 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
354 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
355 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
356 ],
357 );
358 transmute(r)
359 }
360}
361
362#[inline]
366#[target_feature(enable = "avx2")]
367#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
368#[rustc_legacy_const_generics(2)]
369#[stable(feature = "simd_x86", since = "1.27.0")]
370pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
371 static_assert_uimm_bits!(IMM8, 8);
372 unsafe {
373 let a = a.as_i16x16();
374 let b = b.as_i16x16();
375
376 let r: i16x16 = simd_shuffle!(
377 a,
378 b,
379 [
380 [0, 16, 0, 16][IMM8 as usize & 0b11],
381 [1, 1, 17, 17][IMM8 as usize & 0b11],
382 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
383 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
384 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
385 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
386 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
387 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
388 [8, 24, 8, 24][IMM8 as usize & 0b11],
389 [9, 9, 25, 25][IMM8 as usize & 0b11],
390 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
391 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
392 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
393 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
394 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
395 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
396 ],
397 );
398 transmute(r)
399 }
400}
401
402#[inline]
406#[target_feature(enable = "avx2")]
407#[cfg_attr(test, assert_instr(vpblendvb))]
408#[stable(feature = "simd_x86", since = "1.27.0")]
409pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
410 unsafe {
411 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
412 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
413 }
414}
415
416#[inline]
421#[target_feature(enable = "avx2")]
422#[cfg_attr(test, assert_instr(vpbroadcastb))]
423#[stable(feature = "simd_x86", since = "1.27.0")]
424pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
425 unsafe {
426 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
427 transmute::<i8x16, _>(ret)
428 }
429}
430
431#[inline]
436#[target_feature(enable = "avx2")]
437#[cfg_attr(test, assert_instr(vpbroadcastb))]
438#[stable(feature = "simd_x86", since = "1.27.0")]
439pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
440 unsafe {
441 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
442 transmute::<i8x32, _>(ret)
443 }
444}
445
446#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vbroadcastss))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
457 unsafe {
458 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
459 transmute::<i32x4, _>(ret)
460 }
461}
462
463#[inline]
470#[target_feature(enable = "avx2")]
471#[cfg_attr(test, assert_instr(vbroadcastss))]
472#[stable(feature = "simd_x86", since = "1.27.0")]
473pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
474 unsafe {
475 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
476 transmute::<i32x8, _>(ret)
477 }
478}
479
480#[inline]
485#[target_feature(enable = "avx2")]
486#[cfg_attr(test, assert_instr(vmovddup))]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
491 unsafe {
492 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
493 transmute::<i64x2, _>(ret)
494 }
495}
496
497#[inline]
502#[target_feature(enable = "avx2")]
503#[cfg_attr(test, assert_instr(vbroadcastsd))]
504#[stable(feature = "simd_x86", since = "1.27.0")]
505pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
506 unsafe {
507 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
508 transmute::<i64x4, _>(ret)
509 }
510}
511
512#[inline]
517#[target_feature(enable = "avx2")]
518#[cfg_attr(test, assert_instr(vmovddup))]
519#[stable(feature = "simd_x86", since = "1.27.0")]
520pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
521 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
522}
523
524#[inline]
529#[target_feature(enable = "avx2")]
530#[cfg_attr(test, assert_instr(vbroadcastsd))]
531#[stable(feature = "simd_x86", since = "1.27.0")]
532pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
533 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
534}
535
536#[inline]
541#[target_feature(enable = "avx2")]
542#[stable(feature = "simd_x86_updates", since = "1.82.0")]
543pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
544 unsafe {
545 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
546 transmute::<i64x4, _>(ret)
547 }
548}
549
550#[inline]
557#[target_feature(enable = "avx2")]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
560 unsafe {
561 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
562 transmute::<i64x4, _>(ret)
563 }
564}
565
566#[inline]
571#[target_feature(enable = "avx2")]
572#[cfg_attr(test, assert_instr(vbroadcastss))]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
575 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
576}
577
578#[inline]
583#[target_feature(enable = "avx2")]
584#[cfg_attr(test, assert_instr(vbroadcastss))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
587 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
588}
589
590#[inline]
595#[target_feature(enable = "avx2")]
596#[cfg_attr(test, assert_instr(vpbroadcastw))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
599 unsafe {
600 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
601 transmute::<i16x8, _>(ret)
602 }
603}
604
605#[inline]
610#[target_feature(enable = "avx2")]
611#[cfg_attr(test, assert_instr(vpbroadcastw))]
612#[stable(feature = "simd_x86", since = "1.27.0")]
613pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
614 unsafe {
615 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
616 transmute::<i16x16, _>(ret)
617 }
618}
619
620#[inline]
624#[target_feature(enable = "avx2")]
625#[cfg_attr(test, assert_instr(vpcmpeqq))]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
628 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
629}
630
631#[inline]
635#[target_feature(enable = "avx2")]
636#[cfg_attr(test, assert_instr(vpcmpeqd))]
637#[stable(feature = "simd_x86", since = "1.27.0")]
638pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
639 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
640}
641
642#[inline]
646#[target_feature(enable = "avx2")]
647#[cfg_attr(test, assert_instr(vpcmpeqw))]
648#[stable(feature = "simd_x86", since = "1.27.0")]
649pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
650 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
651}
652
653#[inline]
657#[target_feature(enable = "avx2")]
658#[cfg_attr(test, assert_instr(vpcmpeqb))]
659#[stable(feature = "simd_x86", since = "1.27.0")]
660pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
661 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
662}
663
664#[inline]
668#[target_feature(enable = "avx2")]
669#[cfg_attr(test, assert_instr(vpcmpgtq))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
672 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
673}
674
675#[inline]
679#[target_feature(enable = "avx2")]
680#[cfg_attr(test, assert_instr(vpcmpgtd))]
681#[stable(feature = "simd_x86", since = "1.27.0")]
682pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
683 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
684}
685
686#[inline]
690#[target_feature(enable = "avx2")]
691#[cfg_attr(test, assert_instr(vpcmpgtw))]
692#[stable(feature = "simd_x86", since = "1.27.0")]
693pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
694 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
695}
696
697#[inline]
701#[target_feature(enable = "avx2")]
702#[cfg_attr(test, assert_instr(vpcmpgtb))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
705 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
706}
707
708#[inline]
712#[target_feature(enable = "avx2")]
713#[cfg_attr(test, assert_instr(vpmovsxwd))]
714#[stable(feature = "simd_x86", since = "1.27.0")]
715pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
716 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
717}
718
719#[inline]
723#[target_feature(enable = "avx2")]
724#[cfg_attr(test, assert_instr(vpmovsxwq))]
725#[stable(feature = "simd_x86", since = "1.27.0")]
726pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
727 unsafe {
728 let a = a.as_i16x8();
729 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
730 transmute::<i64x4, _>(simd_cast(v64))
731 }
732}
733
734#[inline]
738#[target_feature(enable = "avx2")]
739#[cfg_attr(test, assert_instr(vpmovsxdq))]
740#[stable(feature = "simd_x86", since = "1.27.0")]
741pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
742 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
743}
744
745#[inline]
749#[target_feature(enable = "avx2")]
750#[cfg_attr(test, assert_instr(vpmovsxbw))]
751#[stable(feature = "simd_x86", since = "1.27.0")]
752pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
753 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
754}
755
756#[inline]
760#[target_feature(enable = "avx2")]
761#[cfg_attr(test, assert_instr(vpmovsxbd))]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
764 unsafe {
765 let a = a.as_i8x16();
766 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
767 transmute::<i32x8, _>(simd_cast(v64))
768 }
769}
770
771#[inline]
775#[target_feature(enable = "avx2")]
776#[cfg_attr(test, assert_instr(vpmovsxbq))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
779 unsafe {
780 let a = a.as_i8x16();
781 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
782 transmute::<i64x4, _>(simd_cast(v32))
783 }
784}
785
786#[inline]
791#[target_feature(enable = "avx2")]
792#[cfg_attr(test, assert_instr(vpmovzxwd))]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
795 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
796}
797
798#[inline]
803#[target_feature(enable = "avx2")]
804#[cfg_attr(test, assert_instr(vpmovzxwq))]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
807 unsafe {
808 let a = a.as_u16x8();
809 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
810 transmute::<i64x4, _>(simd_cast(v64))
811 }
812}
813
814#[inline]
818#[target_feature(enable = "avx2")]
819#[cfg_attr(test, assert_instr(vpmovzxdq))]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
822 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
823}
824
825#[inline]
829#[target_feature(enable = "avx2")]
830#[cfg_attr(test, assert_instr(vpmovzxbw))]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
833 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
834}
835
836#[inline]
841#[target_feature(enable = "avx2")]
842#[cfg_attr(test, assert_instr(vpmovzxbd))]
843#[stable(feature = "simd_x86", since = "1.27.0")]
844pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
845 unsafe {
846 let a = a.as_u8x16();
847 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
848 transmute::<i32x8, _>(simd_cast(v64))
849 }
850}
851
852#[inline]
857#[target_feature(enable = "avx2")]
858#[cfg_attr(test, assert_instr(vpmovzxbq))]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
861 unsafe {
862 let a = a.as_u8x16();
863 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
864 transmute::<i64x4, _>(simd_cast(v32))
865 }
866}
867
868#[inline]
872#[target_feature(enable = "avx2")]
873#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
874#[rustc_legacy_const_generics(1)]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
877 static_assert_uimm_bits!(IMM1, 1);
878 unsafe {
879 let a = a.as_i64x4();
880 let b = i64x4::ZERO;
881 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
882 transmute(dst)
883 }
884}
885
886#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vphaddw))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
894 let a = a.as_i16x16();
895 let b = b.as_i16x16();
896 unsafe {
897 let even: i16x16 = simd_shuffle!(
898 a,
899 b,
900 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
901 );
902 let odd: i16x16 = simd_shuffle!(
903 a,
904 b,
905 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
906 );
907 simd_add(even, odd).as_m256i()
908 }
909}
910
911#[inline]
915#[target_feature(enable = "avx2")]
916#[cfg_attr(test, assert_instr(vphaddd))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
919 let a = a.as_i32x8();
920 let b = b.as_i32x8();
921 unsafe {
922 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
923 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
924 simd_add(even, odd).as_m256i()
925 }
926}
927
928#[inline]
933#[target_feature(enable = "avx2")]
934#[cfg_attr(test, assert_instr(vphaddsw))]
935#[stable(feature = "simd_x86", since = "1.27.0")]
936pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
937 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
938}
939
940#[inline]
944#[target_feature(enable = "avx2")]
945#[cfg_attr(test, assert_instr(vphsubw))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
948 let a = a.as_i16x16();
949 let b = b.as_i16x16();
950 unsafe {
951 let even: i16x16 = simd_shuffle!(
952 a,
953 b,
954 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
955 );
956 let odd: i16x16 = simd_shuffle!(
957 a,
958 b,
959 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
960 );
961 simd_sub(even, odd).as_m256i()
962 }
963}
964
965#[inline]
969#[target_feature(enable = "avx2")]
970#[cfg_attr(test, assert_instr(vphsubd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
973 let a = a.as_i32x8();
974 let b = b.as_i32x8();
975 unsafe {
976 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
977 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
978 simd_sub(even, odd).as_m256i()
979 }
980}
981
982#[inline]
987#[target_feature(enable = "avx2")]
988#[cfg_attr(test, assert_instr(vphsubsw))]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
991 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
992}
993
994#[inline]
1000#[target_feature(enable = "avx2")]
1001#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1002#[rustc_legacy_const_generics(2)]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1005 slice: *const i32,
1006 offsets: __m128i,
1007) -> __m128i {
1008 static_assert_imm8_scale!(SCALE);
1009 let zero = i32x4::ZERO;
1010 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1011 let offsets = offsets.as_i32x4();
1012 let slice = slice as *const i8;
1013 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1014 transmute(r)
1015}
1016
1017#[inline]
1024#[target_feature(enable = "avx2")]
1025#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1026#[rustc_legacy_const_generics(4)]
1027#[stable(feature = "simd_x86", since = "1.27.0")]
1028pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1029 src: __m128i,
1030 slice: *const i32,
1031 offsets: __m128i,
1032 mask: __m128i,
1033) -> __m128i {
1034 static_assert_imm8_scale!(SCALE);
1035 let src = src.as_i32x4();
1036 let mask = mask.as_i32x4();
1037 let offsets = offsets.as_i32x4();
1038 let slice = slice as *const i8;
1039 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1040 transmute(r)
1041}
1042
1043#[inline]
1049#[target_feature(enable = "avx2")]
1050#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1051#[rustc_legacy_const_generics(2)]
1052#[stable(feature = "simd_x86", since = "1.27.0")]
1053pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1054 slice: *const i32,
1055 offsets: __m256i,
1056) -> __m256i {
1057 static_assert_imm8_scale!(SCALE);
1058 let zero = i32x8::ZERO;
1059 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1060 let offsets = offsets.as_i32x8();
1061 let slice = slice as *const i8;
1062 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1063 transmute(r)
1064}
1065
1066#[inline]
1073#[target_feature(enable = "avx2")]
1074#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1075#[rustc_legacy_const_generics(4)]
1076#[stable(feature = "simd_x86", since = "1.27.0")]
1077pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1078 src: __m256i,
1079 slice: *const i32,
1080 offsets: __m256i,
1081 mask: __m256i,
1082) -> __m256i {
1083 static_assert_imm8_scale!(SCALE);
1084 let src = src.as_i32x8();
1085 let mask = mask.as_i32x8();
1086 let offsets = offsets.as_i32x8();
1087 let slice = slice as *const i8;
1088 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1089 transmute(r)
1090}
1091
1092#[inline]
1098#[target_feature(enable = "avx2")]
1099#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1100#[rustc_legacy_const_generics(2)]
1101#[stable(feature = "simd_x86", since = "1.27.0")]
1102pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1103 static_assert_imm8_scale!(SCALE);
1104 let zero = _mm_setzero_ps();
1105 let neg_one = _mm_set1_ps(-1.0);
1106 let offsets = offsets.as_i32x4();
1107 let slice = slice as *const i8;
1108 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1109}
1110
1111#[inline]
1118#[target_feature(enable = "avx2")]
1119#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1120#[rustc_legacy_const_generics(4)]
1121#[stable(feature = "simd_x86", since = "1.27.0")]
1122pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1123 src: __m128,
1124 slice: *const f32,
1125 offsets: __m128i,
1126 mask: __m128,
1127) -> __m128 {
1128 static_assert_imm8_scale!(SCALE);
1129 let offsets = offsets.as_i32x4();
1130 let slice = slice as *const i8;
1131 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1132}
1133
1134#[inline]
1140#[target_feature(enable = "avx2")]
1141#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1142#[rustc_legacy_const_generics(2)]
1143#[stable(feature = "simd_x86", since = "1.27.0")]
1144pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1145 static_assert_imm8_scale!(SCALE);
1146 let zero = _mm256_setzero_ps();
1147 let neg_one = _mm256_set1_ps(-1.0);
1148 let offsets = offsets.as_i32x8();
1149 let slice = slice as *const i8;
1150 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1151}
1152
1153#[inline]
1160#[target_feature(enable = "avx2")]
1161#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1162#[rustc_legacy_const_generics(4)]
1163#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1165 src: __m256,
1166 slice: *const f32,
1167 offsets: __m256i,
1168 mask: __m256,
1169) -> __m256 {
1170 static_assert_imm8_scale!(SCALE);
1171 let offsets = offsets.as_i32x8();
1172 let slice = slice as *const i8;
1173 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1174}
1175
1176#[inline]
1182#[target_feature(enable = "avx2")]
1183#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1184#[rustc_legacy_const_generics(2)]
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1187 slice: *const i64,
1188 offsets: __m128i,
1189) -> __m128i {
1190 static_assert_imm8_scale!(SCALE);
1191 let zero = i64x2::ZERO;
1192 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1193 let offsets = offsets.as_i32x4();
1194 let slice = slice as *const i8;
1195 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1196 transmute(r)
1197}
1198
1199#[inline]
1206#[target_feature(enable = "avx2")]
1207#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1208#[rustc_legacy_const_generics(4)]
1209#[stable(feature = "simd_x86", since = "1.27.0")]
1210pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1211 src: __m128i,
1212 slice: *const i64,
1213 offsets: __m128i,
1214 mask: __m128i,
1215) -> __m128i {
1216 static_assert_imm8_scale!(SCALE);
1217 let src = src.as_i64x2();
1218 let mask = mask.as_i64x2();
1219 let offsets = offsets.as_i32x4();
1220 let slice = slice as *const i8;
1221 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1222 transmute(r)
1223}
1224
1225#[inline]
1231#[target_feature(enable = "avx2")]
1232#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1233#[rustc_legacy_const_generics(2)]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1236 slice: *const i64,
1237 offsets: __m128i,
1238) -> __m256i {
1239 static_assert_imm8_scale!(SCALE);
1240 let zero = i64x4::ZERO;
1241 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1242 let offsets = offsets.as_i32x4();
1243 let slice = slice as *const i8;
1244 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1245 transmute(r)
1246}
1247
1248#[inline]
1255#[target_feature(enable = "avx2")]
1256#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1257#[rustc_legacy_const_generics(4)]
1258#[stable(feature = "simd_x86", since = "1.27.0")]
1259pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1260 src: __m256i,
1261 slice: *const i64,
1262 offsets: __m128i,
1263 mask: __m256i,
1264) -> __m256i {
1265 static_assert_imm8_scale!(SCALE);
1266 let src = src.as_i64x4();
1267 let mask = mask.as_i64x4();
1268 let offsets = offsets.as_i32x4();
1269 let slice = slice as *const i8;
1270 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1271 transmute(r)
1272}
1273
1274#[inline]
1280#[target_feature(enable = "avx2")]
1281#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1282#[rustc_legacy_const_generics(2)]
1283#[stable(feature = "simd_x86", since = "1.27.0")]
1284pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1285 static_assert_imm8_scale!(SCALE);
1286 let zero = _mm_setzero_pd();
1287 let neg_one = _mm_set1_pd(-1.0);
1288 let offsets = offsets.as_i32x4();
1289 let slice = slice as *const i8;
1290 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1291}
1292
1293#[inline]
1300#[target_feature(enable = "avx2")]
1301#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1302#[rustc_legacy_const_generics(4)]
1303#[stable(feature = "simd_x86", since = "1.27.0")]
1304pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1305 src: __m128d,
1306 slice: *const f64,
1307 offsets: __m128i,
1308 mask: __m128d,
1309) -> __m128d {
1310 static_assert_imm8_scale!(SCALE);
1311 let offsets = offsets.as_i32x4();
1312 let slice = slice as *const i8;
1313 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1314}
1315
1316#[inline]
1322#[target_feature(enable = "avx2")]
1323#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1324#[rustc_legacy_const_generics(2)]
1325#[stable(feature = "simd_x86", since = "1.27.0")]
1326pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1327 slice: *const f64,
1328 offsets: __m128i,
1329) -> __m256d {
1330 static_assert_imm8_scale!(SCALE);
1331 let zero = _mm256_setzero_pd();
1332 let neg_one = _mm256_set1_pd(-1.0);
1333 let offsets = offsets.as_i32x4();
1334 let slice = slice as *const i8;
1335 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1336}
1337
1338#[inline]
1345#[target_feature(enable = "avx2")]
1346#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1347#[rustc_legacy_const_generics(4)]
1348#[stable(feature = "simd_x86", since = "1.27.0")]
1349pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1350 src: __m256d,
1351 slice: *const f64,
1352 offsets: __m128i,
1353 mask: __m256d,
1354) -> __m256d {
1355 static_assert_imm8_scale!(SCALE);
1356 let offsets = offsets.as_i32x4();
1357 let slice = slice as *const i8;
1358 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1359}
1360
1361#[inline]
1367#[target_feature(enable = "avx2")]
1368#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1369#[rustc_legacy_const_generics(2)]
1370#[stable(feature = "simd_x86", since = "1.27.0")]
1371pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1372 slice: *const i32,
1373 offsets: __m128i,
1374) -> __m128i {
1375 static_assert_imm8_scale!(SCALE);
1376 let zero = i32x4::ZERO;
1377 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1378 let offsets = offsets.as_i64x2();
1379 let slice = slice as *const i8;
1380 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1381 transmute(r)
1382}
1383
1384#[inline]
1391#[target_feature(enable = "avx2")]
1392#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1393#[rustc_legacy_const_generics(4)]
1394#[stable(feature = "simd_x86", since = "1.27.0")]
1395pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1396 src: __m128i,
1397 slice: *const i32,
1398 offsets: __m128i,
1399 mask: __m128i,
1400) -> __m128i {
1401 static_assert_imm8_scale!(SCALE);
1402 let src = src.as_i32x4();
1403 let mask = mask.as_i32x4();
1404 let offsets = offsets.as_i64x2();
1405 let slice = slice as *const i8;
1406 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1407 transmute(r)
1408}
1409
1410#[inline]
1416#[target_feature(enable = "avx2")]
1417#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1418#[rustc_legacy_const_generics(2)]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1421 slice: *const i32,
1422 offsets: __m256i,
1423) -> __m128i {
1424 static_assert_imm8_scale!(SCALE);
1425 let zero = i32x4::ZERO;
1426 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1427 let offsets = offsets.as_i64x4();
1428 let slice = slice as *const i8;
1429 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1430 transmute(r)
1431}
1432
1433#[inline]
1440#[target_feature(enable = "avx2")]
1441#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1442#[rustc_legacy_const_generics(4)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1445 src: __m128i,
1446 slice: *const i32,
1447 offsets: __m256i,
1448 mask: __m128i,
1449) -> __m128i {
1450 static_assert_imm8_scale!(SCALE);
1451 let src = src.as_i32x4();
1452 let mask = mask.as_i32x4();
1453 let offsets = offsets.as_i64x4();
1454 let slice = slice as *const i8;
1455 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1456 transmute(r)
1457}
1458
1459#[inline]
1465#[target_feature(enable = "avx2")]
1466#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1467#[rustc_legacy_const_generics(2)]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1470 static_assert_imm8_scale!(SCALE);
1471 let zero = _mm_setzero_ps();
1472 let neg_one = _mm_set1_ps(-1.0);
1473 let offsets = offsets.as_i64x2();
1474 let slice = slice as *const i8;
1475 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1476}
1477
1478#[inline]
1485#[target_feature(enable = "avx2")]
1486#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1487#[rustc_legacy_const_generics(4)]
1488#[stable(feature = "simd_x86", since = "1.27.0")]
1489pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1490 src: __m128,
1491 slice: *const f32,
1492 offsets: __m128i,
1493 mask: __m128,
1494) -> __m128 {
1495 static_assert_imm8_scale!(SCALE);
1496 let offsets = offsets.as_i64x2();
1497 let slice = slice as *const i8;
1498 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1499}
1500
1501#[inline]
1507#[target_feature(enable = "avx2")]
1508#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1509#[rustc_legacy_const_generics(2)]
1510#[stable(feature = "simd_x86", since = "1.27.0")]
1511pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1512 static_assert_imm8_scale!(SCALE);
1513 let zero = _mm_setzero_ps();
1514 let neg_one = _mm_set1_ps(-1.0);
1515 let offsets = offsets.as_i64x4();
1516 let slice = slice as *const i8;
1517 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1518}
1519
1520#[inline]
1527#[target_feature(enable = "avx2")]
1528#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1529#[rustc_legacy_const_generics(4)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1532 src: __m128,
1533 slice: *const f32,
1534 offsets: __m256i,
1535 mask: __m128,
1536) -> __m128 {
1537 static_assert_imm8_scale!(SCALE);
1538 let offsets = offsets.as_i64x4();
1539 let slice = slice as *const i8;
1540 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1541}
1542
1543#[inline]
1549#[target_feature(enable = "avx2")]
1550#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1551#[rustc_legacy_const_generics(2)]
1552#[stable(feature = "simd_x86", since = "1.27.0")]
1553pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1554 slice: *const i64,
1555 offsets: __m128i,
1556) -> __m128i {
1557 static_assert_imm8_scale!(SCALE);
1558 let zero = i64x2::ZERO;
1559 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1560 let slice = slice as *const i8;
1561 let offsets = offsets.as_i64x2();
1562 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1563 transmute(r)
1564}
1565
1566#[inline]
1573#[target_feature(enable = "avx2")]
1574#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1575#[rustc_legacy_const_generics(4)]
1576#[stable(feature = "simd_x86", since = "1.27.0")]
1577pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1578 src: __m128i,
1579 slice: *const i64,
1580 offsets: __m128i,
1581 mask: __m128i,
1582) -> __m128i {
1583 static_assert_imm8_scale!(SCALE);
1584 let src = src.as_i64x2();
1585 let mask = mask.as_i64x2();
1586 let offsets = offsets.as_i64x2();
1587 let slice = slice as *const i8;
1588 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1589 transmute(r)
1590}
1591
1592#[inline]
1598#[target_feature(enable = "avx2")]
1599#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1600#[rustc_legacy_const_generics(2)]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1603 slice: *const i64,
1604 offsets: __m256i,
1605) -> __m256i {
1606 static_assert_imm8_scale!(SCALE);
1607 let zero = i64x4::ZERO;
1608 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1609 let slice = slice as *const i8;
1610 let offsets = offsets.as_i64x4();
1611 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1612 transmute(r)
1613}
1614
1615#[inline]
1622#[target_feature(enable = "avx2")]
1623#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1624#[rustc_legacy_const_generics(4)]
1625#[stable(feature = "simd_x86", since = "1.27.0")]
1626pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1627 src: __m256i,
1628 slice: *const i64,
1629 offsets: __m256i,
1630 mask: __m256i,
1631) -> __m256i {
1632 static_assert_imm8_scale!(SCALE);
1633 let src = src.as_i64x4();
1634 let mask = mask.as_i64x4();
1635 let offsets = offsets.as_i64x4();
1636 let slice = slice as *const i8;
1637 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1638 transmute(r)
1639}
1640
1641#[inline]
1647#[target_feature(enable = "avx2")]
1648#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1649#[rustc_legacy_const_generics(2)]
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1652 static_assert_imm8_scale!(SCALE);
1653 let zero = _mm_setzero_pd();
1654 let neg_one = _mm_set1_pd(-1.0);
1655 let slice = slice as *const i8;
1656 let offsets = offsets.as_i64x2();
1657 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1658}
1659
1660#[inline]
1667#[target_feature(enable = "avx2")]
1668#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1669#[rustc_legacy_const_generics(4)]
1670#[stable(feature = "simd_x86", since = "1.27.0")]
1671pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1672 src: __m128d,
1673 slice: *const f64,
1674 offsets: __m128i,
1675 mask: __m128d,
1676) -> __m128d {
1677 static_assert_imm8_scale!(SCALE);
1678 let slice = slice as *const i8;
1679 let offsets = offsets.as_i64x2();
1680 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1681}
1682
1683#[inline]
1689#[target_feature(enable = "avx2")]
1690#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1691#[rustc_legacy_const_generics(2)]
1692#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1694 slice: *const f64,
1695 offsets: __m256i,
1696) -> __m256d {
1697 static_assert_imm8_scale!(SCALE);
1698 let zero = _mm256_setzero_pd();
1699 let neg_one = _mm256_set1_pd(-1.0);
1700 let slice = slice as *const i8;
1701 let offsets = offsets.as_i64x4();
1702 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1703}
1704
1705#[inline]
1712#[target_feature(enable = "avx2")]
1713#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1714#[rustc_legacy_const_generics(4)]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1717 src: __m256d,
1718 slice: *const f64,
1719 offsets: __m256i,
1720 mask: __m256d,
1721) -> __m256d {
1722 static_assert_imm8_scale!(SCALE);
1723 let slice = slice as *const i8;
1724 let offsets = offsets.as_i64x4();
1725 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1726}
1727
1728#[inline]
1733#[target_feature(enable = "avx2")]
1734#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1735#[rustc_legacy_const_generics(2)]
1736#[stable(feature = "simd_x86", since = "1.27.0")]
1737pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1738 static_assert_uimm_bits!(IMM1, 1);
1739 unsafe {
1740 let a = a.as_i64x4();
1741 let b = _mm256_castsi128_si256(b).as_i64x4();
1742 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1743 transmute(dst)
1744 }
1745}
1746
1747#[inline]
1753#[target_feature(enable = "avx2")]
1754#[cfg_attr(test, assert_instr(vpmaddwd))]
1755#[stable(feature = "simd_x86", since = "1.27.0")]
1756pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1757 unsafe {
1758 let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16()));
1759 let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]);
1760 let odd: i32x8 = simd_shuffle!(r, r, [1, 3, 5, 7, 9, 11, 13, 15]);
1761 simd_add(even, odd).as_m256i()
1762 }
1763}
1764
1765#[inline]
1772#[target_feature(enable = "avx2")]
1773#[cfg_attr(test, assert_instr(vpmaddubsw))]
1774#[stable(feature = "simd_x86", since = "1.27.0")]
1775pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1776 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1777}
1778
1779#[inline]
1785#[target_feature(enable = "avx2")]
1786#[cfg_attr(test, assert_instr(vpmaskmovd))]
1787#[stable(feature = "simd_x86", since = "1.27.0")]
1788pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1789 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1790 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1791}
1792
1793#[inline]
1799#[target_feature(enable = "avx2")]
1800#[cfg_attr(test, assert_instr(vpmaskmovd))]
1801#[stable(feature = "simd_x86", since = "1.27.0")]
1802pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1803 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1804 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1805}
1806
1807#[inline]
1813#[target_feature(enable = "avx2")]
1814#[cfg_attr(test, assert_instr(vpmaskmovq))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1817 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1818 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1819}
1820
1821#[inline]
1827#[target_feature(enable = "avx2")]
1828#[cfg_attr(test, assert_instr(vpmaskmovq))]
1829#[stable(feature = "simd_x86", since = "1.27.0")]
1830pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1831 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1832 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1833}
1834
1835#[inline]
1841#[target_feature(enable = "avx2")]
1842#[cfg_attr(test, assert_instr(vpmaskmovd))]
1843#[stable(feature = "simd_x86", since = "1.27.0")]
1844pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1845 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1846 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1847}
1848
1849#[inline]
1855#[target_feature(enable = "avx2")]
1856#[cfg_attr(test, assert_instr(vpmaskmovd))]
1857#[stable(feature = "simd_x86", since = "1.27.0")]
1858pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1859 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1860 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1861}
1862
1863#[inline]
1869#[target_feature(enable = "avx2")]
1870#[cfg_attr(test, assert_instr(vpmaskmovq))]
1871#[stable(feature = "simd_x86", since = "1.27.0")]
1872pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1873 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1874 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1875}
1876
1877#[inline]
1883#[target_feature(enable = "avx2")]
1884#[cfg_attr(test, assert_instr(vpmaskmovq))]
1885#[stable(feature = "simd_x86", since = "1.27.0")]
1886pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1887 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1888 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1889}
1890
1891#[inline]
1896#[target_feature(enable = "avx2")]
1897#[cfg_attr(test, assert_instr(vpmaxsw))]
1898#[stable(feature = "simd_x86", since = "1.27.0")]
1899pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1900 unsafe {
1901 let a = a.as_i16x16();
1902 let b = b.as_i16x16();
1903 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1904 }
1905}
1906
1907#[inline]
1912#[target_feature(enable = "avx2")]
1913#[cfg_attr(test, assert_instr(vpmaxsd))]
1914#[stable(feature = "simd_x86", since = "1.27.0")]
1915pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1916 unsafe {
1917 let a = a.as_i32x8();
1918 let b = b.as_i32x8();
1919 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1920 }
1921}
1922
1923#[inline]
1928#[target_feature(enable = "avx2")]
1929#[cfg_attr(test, assert_instr(vpmaxsb))]
1930#[stable(feature = "simd_x86", since = "1.27.0")]
1931pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1932 unsafe {
1933 let a = a.as_i8x32();
1934 let b = b.as_i8x32();
1935 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1936 }
1937}
1938
1939#[inline]
1944#[target_feature(enable = "avx2")]
1945#[cfg_attr(test, assert_instr(vpmaxuw))]
1946#[stable(feature = "simd_x86", since = "1.27.0")]
1947pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1948 unsafe {
1949 let a = a.as_u16x16();
1950 let b = b.as_u16x16();
1951 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1952 }
1953}
1954
1955#[inline]
1960#[target_feature(enable = "avx2")]
1961#[cfg_attr(test, assert_instr(vpmaxud))]
1962#[stable(feature = "simd_x86", since = "1.27.0")]
1963pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1964 unsafe {
1965 let a = a.as_u32x8();
1966 let b = b.as_u32x8();
1967 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1968 }
1969}
1970
1971#[inline]
1976#[target_feature(enable = "avx2")]
1977#[cfg_attr(test, assert_instr(vpmaxub))]
1978#[stable(feature = "simd_x86", since = "1.27.0")]
1979pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1980 unsafe {
1981 let a = a.as_u8x32();
1982 let b = b.as_u8x32();
1983 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1984 }
1985}
1986
1987#[inline]
1992#[target_feature(enable = "avx2")]
1993#[cfg_attr(test, assert_instr(vpminsw))]
1994#[stable(feature = "simd_x86", since = "1.27.0")]
1995pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1996 unsafe {
1997 let a = a.as_i16x16();
1998 let b = b.as_i16x16();
1999 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2000 }
2001}
2002
2003#[inline]
2008#[target_feature(enable = "avx2")]
2009#[cfg_attr(test, assert_instr(vpminsd))]
2010#[stable(feature = "simd_x86", since = "1.27.0")]
2011pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2012 unsafe {
2013 let a = a.as_i32x8();
2014 let b = b.as_i32x8();
2015 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2016 }
2017}
2018
2019#[inline]
2024#[target_feature(enable = "avx2")]
2025#[cfg_attr(test, assert_instr(vpminsb))]
2026#[stable(feature = "simd_x86", since = "1.27.0")]
2027pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2028 unsafe {
2029 let a = a.as_i8x32();
2030 let b = b.as_i8x32();
2031 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2032 }
2033}
2034
2035#[inline]
2040#[target_feature(enable = "avx2")]
2041#[cfg_attr(test, assert_instr(vpminuw))]
2042#[stable(feature = "simd_x86", since = "1.27.0")]
2043pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2044 unsafe {
2045 let a = a.as_u16x16();
2046 let b = b.as_u16x16();
2047 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2048 }
2049}
2050
2051#[inline]
2056#[target_feature(enable = "avx2")]
2057#[cfg_attr(test, assert_instr(vpminud))]
2058#[stable(feature = "simd_x86", since = "1.27.0")]
2059pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2060 unsafe {
2061 let a = a.as_u32x8();
2062 let b = b.as_u32x8();
2063 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2064 }
2065}
2066
2067#[inline]
2072#[target_feature(enable = "avx2")]
2073#[cfg_attr(test, assert_instr(vpminub))]
2074#[stable(feature = "simd_x86", since = "1.27.0")]
2075pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2076 unsafe {
2077 let a = a.as_u8x32();
2078 let b = b.as_u8x32();
2079 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2080 }
2081}
2082
2083#[inline]
2088#[target_feature(enable = "avx2")]
2089#[cfg_attr(test, assert_instr(vpmovmskb))]
2090#[stable(feature = "simd_x86", since = "1.27.0")]
2091pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2092 unsafe {
2093 let z = i8x32::ZERO;
2094 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2095 simd_bitmask::<_, u32>(m) as i32
2096 }
2097}
2098
2099#[inline]
2109#[target_feature(enable = "avx2")]
2110#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2111#[rustc_legacy_const_generics(2)]
2112#[stable(feature = "simd_x86", since = "1.27.0")]
2113pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2114 static_assert_uimm_bits!(IMM8, 8);
2115 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2116}
2117
2118#[inline]
2125#[target_feature(enable = "avx2")]
2126#[cfg_attr(test, assert_instr(vpmuldq))]
2127#[stable(feature = "simd_x86", since = "1.27.0")]
2128pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2129 unsafe {
2130 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2131 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2132 transmute(simd_mul(a, b))
2133 }
2134}
2135
2136#[inline]
2143#[target_feature(enable = "avx2")]
2144#[cfg_attr(test, assert_instr(vpmuludq))]
2145#[stable(feature = "simd_x86", since = "1.27.0")]
2146pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2147 unsafe {
2148 let a = a.as_u64x4();
2149 let b = b.as_u64x4();
2150 let mask = u64x4::splat(u32::MAX.into());
2151 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2152 }
2153}
2154
2155#[inline]
2161#[target_feature(enable = "avx2")]
2162#[cfg_attr(test, assert_instr(vpmulhw))]
2163#[stable(feature = "simd_x86", since = "1.27.0")]
2164pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2165 unsafe {
2166 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2167 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2168 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2169 transmute(simd_cast::<i32x16, i16x16>(r))
2170 }
2171}
2172
2173#[inline]
2179#[target_feature(enable = "avx2")]
2180#[cfg_attr(test, assert_instr(vpmulhuw))]
2181#[stable(feature = "simd_x86", since = "1.27.0")]
2182pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2183 unsafe {
2184 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2185 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2186 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2187 transmute(simd_cast::<u32x16, u16x16>(r))
2188 }
2189}
2190
2191#[inline]
2197#[target_feature(enable = "avx2")]
2198#[cfg_attr(test, assert_instr(vpmullw))]
2199#[stable(feature = "simd_x86", since = "1.27.0")]
2200pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2201 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2202}
2203
2204#[inline]
2210#[target_feature(enable = "avx2")]
2211#[cfg_attr(test, assert_instr(vpmulld))]
2212#[stable(feature = "simd_x86", since = "1.27.0")]
2213pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2214 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2215}
2216
2217#[inline]
2224#[target_feature(enable = "avx2")]
2225#[cfg_attr(test, assert_instr(vpmulhrsw))]
2226#[stable(feature = "simd_x86", since = "1.27.0")]
2227pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2228 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2229}
2230
2231#[inline]
2236#[target_feature(enable = "avx2")]
2237#[cfg_attr(test, assert_instr(vorps))]
2238#[stable(feature = "simd_x86", since = "1.27.0")]
2239pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2240 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2241}
2242
2243#[inline]
2248#[target_feature(enable = "avx2")]
2249#[cfg_attr(test, assert_instr(vpacksswb))]
2250#[stable(feature = "simd_x86", since = "1.27.0")]
2251pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2252 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2253}
2254
2255#[inline]
2260#[target_feature(enable = "avx2")]
2261#[cfg_attr(test, assert_instr(vpackssdw))]
2262#[stable(feature = "simd_x86", since = "1.27.0")]
2263pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2264 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2265}
2266
2267#[inline]
2272#[target_feature(enable = "avx2")]
2273#[cfg_attr(test, assert_instr(vpackuswb))]
2274#[stable(feature = "simd_x86", since = "1.27.0")]
2275pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2276 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2277}
2278
2279#[inline]
2284#[target_feature(enable = "avx2")]
2285#[cfg_attr(test, assert_instr(vpackusdw))]
2286#[stable(feature = "simd_x86", since = "1.27.0")]
2287pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2288 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2289}
2290
2291#[inline]
2298#[target_feature(enable = "avx2")]
2299#[cfg_attr(test, assert_instr(vpermps))]
2300#[stable(feature = "simd_x86", since = "1.27.0")]
2301pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2302 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2303}
2304
2305#[inline]
2309#[target_feature(enable = "avx2")]
2310#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2311#[rustc_legacy_const_generics(1)]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2314 static_assert_uimm_bits!(IMM8, 8);
2315 unsafe {
2316 let zero = i64x4::ZERO;
2317 let r: i64x4 = simd_shuffle!(
2318 a.as_i64x4(),
2319 zero,
2320 [
2321 IMM8 as u32 & 0b11,
2322 (IMM8 as u32 >> 2) & 0b11,
2323 (IMM8 as u32 >> 4) & 0b11,
2324 (IMM8 as u32 >> 6) & 0b11,
2325 ],
2326 );
2327 transmute(r)
2328 }
2329}
2330
2331#[inline]
2335#[target_feature(enable = "avx2")]
2336#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2337#[rustc_legacy_const_generics(2)]
2338#[stable(feature = "simd_x86", since = "1.27.0")]
2339pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2340 static_assert_uimm_bits!(IMM8, 8);
2341 _mm256_permute2f128_si256::<IMM8>(a, b)
2342}
2343
2344#[inline]
2349#[target_feature(enable = "avx2")]
2350#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2351#[rustc_legacy_const_generics(1)]
2352#[stable(feature = "simd_x86", since = "1.27.0")]
2353pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2354 static_assert_uimm_bits!(IMM8, 8);
2355 unsafe {
2356 simd_shuffle!(
2357 a,
2358 _mm256_undefined_pd(),
2359 [
2360 IMM8 as u32 & 0b11,
2361 (IMM8 as u32 >> 2) & 0b11,
2362 (IMM8 as u32 >> 4) & 0b11,
2363 (IMM8 as u32 >> 6) & 0b11,
2364 ],
2365 )
2366 }
2367}
2368
2369#[inline]
2374#[target_feature(enable = "avx2")]
2375#[cfg_attr(test, assert_instr(vpermps))]
2376#[stable(feature = "simd_x86", since = "1.27.0")]
2377pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2378 unsafe { permps(a, idx.as_i32x8()) }
2379}
2380
2381#[inline]
2388#[target_feature(enable = "avx2")]
2389#[cfg_attr(test, assert_instr(vpsadbw))]
2390#[stable(feature = "simd_x86", since = "1.27.0")]
2391pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2392 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2393}
2394
2395#[inline]
2426#[target_feature(enable = "avx2")]
2427#[cfg_attr(test, assert_instr(vpshufb))]
2428#[stable(feature = "simd_x86", since = "1.27.0")]
2429pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2430 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2431}
2432
2433#[inline]
2464#[target_feature(enable = "avx2")]
2465#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2466#[rustc_legacy_const_generics(1)]
2467#[stable(feature = "simd_x86", since = "1.27.0")]
2468pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2469 static_assert_uimm_bits!(MASK, 8);
2470 unsafe {
2471 let r: i32x8 = simd_shuffle!(
2472 a.as_i32x8(),
2473 a.as_i32x8(),
2474 [
2475 MASK as u32 & 0b11,
2476 (MASK as u32 >> 2) & 0b11,
2477 (MASK as u32 >> 4) & 0b11,
2478 (MASK as u32 >> 6) & 0b11,
2479 (MASK as u32 & 0b11) + 4,
2480 ((MASK as u32 >> 2) & 0b11) + 4,
2481 ((MASK as u32 >> 4) & 0b11) + 4,
2482 ((MASK as u32 >> 6) & 0b11) + 4,
2483 ],
2484 );
2485 transmute(r)
2486 }
2487}
2488
2489#[inline]
2495#[target_feature(enable = "avx2")]
2496#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2497#[rustc_legacy_const_generics(1)]
2498#[stable(feature = "simd_x86", since = "1.27.0")]
2499pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2500 static_assert_uimm_bits!(IMM8, 8);
2501 unsafe {
2502 let a = a.as_i16x16();
2503 let r: i16x16 = simd_shuffle!(
2504 a,
2505 a,
2506 [
2507 0,
2508 1,
2509 2,
2510 3,
2511 4 + (IMM8 as u32 & 0b11),
2512 4 + ((IMM8 as u32 >> 2) & 0b11),
2513 4 + ((IMM8 as u32 >> 4) & 0b11),
2514 4 + ((IMM8 as u32 >> 6) & 0b11),
2515 8,
2516 9,
2517 10,
2518 11,
2519 12 + (IMM8 as u32 & 0b11),
2520 12 + ((IMM8 as u32 >> 2) & 0b11),
2521 12 + ((IMM8 as u32 >> 4) & 0b11),
2522 12 + ((IMM8 as u32 >> 6) & 0b11),
2523 ],
2524 );
2525 transmute(r)
2526 }
2527}
2528
2529#[inline]
2535#[target_feature(enable = "avx2")]
2536#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2537#[rustc_legacy_const_generics(1)]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2540 static_assert_uimm_bits!(IMM8, 8);
2541 unsafe {
2542 let a = a.as_i16x16();
2543 let r: i16x16 = simd_shuffle!(
2544 a,
2545 a,
2546 [
2547 0 + (IMM8 as u32 & 0b11),
2548 0 + ((IMM8 as u32 >> 2) & 0b11),
2549 0 + ((IMM8 as u32 >> 4) & 0b11),
2550 0 + ((IMM8 as u32 >> 6) & 0b11),
2551 4,
2552 5,
2553 6,
2554 7,
2555 8 + (IMM8 as u32 & 0b11),
2556 8 + ((IMM8 as u32 >> 2) & 0b11),
2557 8 + ((IMM8 as u32 >> 4) & 0b11),
2558 8 + ((IMM8 as u32 >> 6) & 0b11),
2559 12,
2560 13,
2561 14,
2562 15,
2563 ],
2564 );
2565 transmute(r)
2566 }
2567}
2568
2569#[inline]
2575#[target_feature(enable = "avx2")]
2576#[cfg_attr(test, assert_instr(vpsignw))]
2577#[stable(feature = "simd_x86", since = "1.27.0")]
2578pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2579 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2580}
2581
2582#[inline]
2588#[target_feature(enable = "avx2")]
2589#[cfg_attr(test, assert_instr(vpsignd))]
2590#[stable(feature = "simd_x86", since = "1.27.0")]
2591pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2592 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2593}
2594
2595#[inline]
2601#[target_feature(enable = "avx2")]
2602#[cfg_attr(test, assert_instr(vpsignb))]
2603#[stable(feature = "simd_x86", since = "1.27.0")]
2604pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2605 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2606}
2607
2608#[inline]
2613#[target_feature(enable = "avx2")]
2614#[cfg_attr(test, assert_instr(vpsllw))]
2615#[stable(feature = "simd_x86", since = "1.27.0")]
2616pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2617 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2618}
2619
2620#[inline]
2625#[target_feature(enable = "avx2")]
2626#[cfg_attr(test, assert_instr(vpslld))]
2627#[stable(feature = "simd_x86", since = "1.27.0")]
2628pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2629 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2630}
2631
2632#[inline]
2637#[target_feature(enable = "avx2")]
2638#[cfg_attr(test, assert_instr(vpsllq))]
2639#[stable(feature = "simd_x86", since = "1.27.0")]
2640pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2641 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2642}
2643
2644#[inline]
2649#[target_feature(enable = "avx2")]
2650#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2651#[rustc_legacy_const_generics(1)]
2652#[stable(feature = "simd_x86", since = "1.27.0")]
2653pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2654 static_assert_uimm_bits!(IMM8, 8);
2655 unsafe {
2656 if IMM8 >= 16 {
2657 _mm256_setzero_si256()
2658 } else {
2659 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2660 }
2661 }
2662}
2663
2664#[inline]
2669#[target_feature(enable = "avx2")]
2670#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2671#[rustc_legacy_const_generics(1)]
2672#[stable(feature = "simd_x86", since = "1.27.0")]
2673pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2674 unsafe {
2675 static_assert_uimm_bits!(IMM8, 8);
2676 if IMM8 >= 32 {
2677 _mm256_setzero_si256()
2678 } else {
2679 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2680 }
2681 }
2682}
2683
2684#[inline]
2689#[target_feature(enable = "avx2")]
2690#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2691#[rustc_legacy_const_generics(1)]
2692#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2694 unsafe {
2695 static_assert_uimm_bits!(IMM8, 8);
2696 if IMM8 >= 64 {
2697 _mm256_setzero_si256()
2698 } else {
2699 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2700 }
2701 }
2702}
2703
2704#[inline]
2708#[target_feature(enable = "avx2")]
2709#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2710#[rustc_legacy_const_generics(1)]
2711#[stable(feature = "simd_x86", since = "1.27.0")]
2712pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2713 static_assert_uimm_bits!(IMM8, 8);
2714 _mm256_bslli_epi128::<IMM8>(a)
2715}
2716
2717#[inline]
2721#[target_feature(enable = "avx2")]
2722#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2723#[rustc_legacy_const_generics(1)]
2724#[stable(feature = "simd_x86", since = "1.27.0")]
2725pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2726 static_assert_uimm_bits!(IMM8, 8);
2727 const fn mask(shift: i32, i: u32) -> u32 {
2728 let shift = shift as u32 & 0xff;
2729 if shift > 15 || i % 16 < shift {
2730 0
2731 } else {
2732 32 + (i - shift)
2733 }
2734 }
2735 unsafe {
2736 let a = a.as_i8x32();
2737 let r: i8x32 = simd_shuffle!(
2738 i8x32::ZERO,
2739 a,
2740 [
2741 mask(IMM8, 0),
2742 mask(IMM8, 1),
2743 mask(IMM8, 2),
2744 mask(IMM8, 3),
2745 mask(IMM8, 4),
2746 mask(IMM8, 5),
2747 mask(IMM8, 6),
2748 mask(IMM8, 7),
2749 mask(IMM8, 8),
2750 mask(IMM8, 9),
2751 mask(IMM8, 10),
2752 mask(IMM8, 11),
2753 mask(IMM8, 12),
2754 mask(IMM8, 13),
2755 mask(IMM8, 14),
2756 mask(IMM8, 15),
2757 mask(IMM8, 16),
2758 mask(IMM8, 17),
2759 mask(IMM8, 18),
2760 mask(IMM8, 19),
2761 mask(IMM8, 20),
2762 mask(IMM8, 21),
2763 mask(IMM8, 22),
2764 mask(IMM8, 23),
2765 mask(IMM8, 24),
2766 mask(IMM8, 25),
2767 mask(IMM8, 26),
2768 mask(IMM8, 27),
2769 mask(IMM8, 28),
2770 mask(IMM8, 29),
2771 mask(IMM8, 30),
2772 mask(IMM8, 31),
2773 ],
2774 );
2775 transmute(r)
2776 }
2777}
2778
2779#[inline]
2785#[target_feature(enable = "avx2")]
2786#[cfg_attr(test, assert_instr(vpsllvd))]
2787#[stable(feature = "simd_x86", since = "1.27.0")]
2788pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2789 unsafe {
2790 let count = count.as_u32x4();
2791 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2792 let count = simd_select(no_overflow, count, u32x4::ZERO);
2793 simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
2794 }
2795}
2796
2797#[inline]
2803#[target_feature(enable = "avx2")]
2804#[cfg_attr(test, assert_instr(vpsllvd))]
2805#[stable(feature = "simd_x86", since = "1.27.0")]
2806pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2807 unsafe {
2808 let count = count.as_u32x8();
2809 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2810 let count = simd_select(no_overflow, count, u32x8::ZERO);
2811 simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
2812 }
2813}
2814
2815#[inline]
2821#[target_feature(enable = "avx2")]
2822#[cfg_attr(test, assert_instr(vpsllvq))]
2823#[stable(feature = "simd_x86", since = "1.27.0")]
2824pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2825 unsafe {
2826 let count = count.as_u64x2();
2827 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
2828 let count = simd_select(no_overflow, count, u64x2::ZERO);
2829 simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
2830 }
2831}
2832
2833#[inline]
2839#[target_feature(enable = "avx2")]
2840#[cfg_attr(test, assert_instr(vpsllvq))]
2841#[stable(feature = "simd_x86", since = "1.27.0")]
2842pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2843 unsafe {
2844 let count = count.as_u64x4();
2845 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
2846 let count = simd_select(no_overflow, count, u64x4::ZERO);
2847 simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
2848 }
2849}
2850
2851#[inline]
2856#[target_feature(enable = "avx2")]
2857#[cfg_attr(test, assert_instr(vpsraw))]
2858#[stable(feature = "simd_x86", since = "1.27.0")]
2859pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2860 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2861}
2862
2863#[inline]
2868#[target_feature(enable = "avx2")]
2869#[cfg_attr(test, assert_instr(vpsrad))]
2870#[stable(feature = "simd_x86", since = "1.27.0")]
2871pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2872 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2873}
2874
2875#[inline]
2880#[target_feature(enable = "avx2")]
2881#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2882#[rustc_legacy_const_generics(1)]
2883#[stable(feature = "simd_x86", since = "1.27.0")]
2884pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2885 static_assert_uimm_bits!(IMM8, 8);
2886 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2887}
2888
2889#[inline]
2894#[target_feature(enable = "avx2")]
2895#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2896#[rustc_legacy_const_generics(1)]
2897#[stable(feature = "simd_x86", since = "1.27.0")]
2898pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2899 static_assert_uimm_bits!(IMM8, 8);
2900 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2901}
2902
2903#[inline]
2908#[target_feature(enable = "avx2")]
2909#[cfg_attr(test, assert_instr(vpsravd))]
2910#[stable(feature = "simd_x86", since = "1.27.0")]
2911pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2912 unsafe {
2913 let count = count.as_u32x4();
2914 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2915 let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
2916 simd_shr(a.as_i32x4(), count).as_m128i()
2917 }
2918}
2919
2920#[inline]
2925#[target_feature(enable = "avx2")]
2926#[cfg_attr(test, assert_instr(vpsravd))]
2927#[stable(feature = "simd_x86", since = "1.27.0")]
2928pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2929 unsafe {
2930 let count = count.as_u32x8();
2931 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2932 let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
2933 simd_shr(a.as_i32x8(), count).as_m256i()
2934 }
2935}
2936
2937#[inline]
2941#[target_feature(enable = "avx2")]
2942#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2943#[rustc_legacy_const_generics(1)]
2944#[stable(feature = "simd_x86", since = "1.27.0")]
2945pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2946 static_assert_uimm_bits!(IMM8, 8);
2947 _mm256_bsrli_epi128::<IMM8>(a)
2948}
2949
2950#[inline]
2954#[target_feature(enable = "avx2")]
2955#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2956#[rustc_legacy_const_generics(1)]
2957#[stable(feature = "simd_x86", since = "1.27.0")]
2958pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2959 static_assert_uimm_bits!(IMM8, 8);
2960 const fn mask(shift: i32, i: u32) -> u32 {
2961 let shift = shift as u32 & 0xff;
2962 if shift > 15 || (15 - (i % 16)) < shift {
2963 0
2964 } else {
2965 32 + (i + shift)
2966 }
2967 }
2968 unsafe {
2969 let a = a.as_i8x32();
2970 let r: i8x32 = simd_shuffle!(
2971 i8x32::ZERO,
2972 a,
2973 [
2974 mask(IMM8, 0),
2975 mask(IMM8, 1),
2976 mask(IMM8, 2),
2977 mask(IMM8, 3),
2978 mask(IMM8, 4),
2979 mask(IMM8, 5),
2980 mask(IMM8, 6),
2981 mask(IMM8, 7),
2982 mask(IMM8, 8),
2983 mask(IMM8, 9),
2984 mask(IMM8, 10),
2985 mask(IMM8, 11),
2986 mask(IMM8, 12),
2987 mask(IMM8, 13),
2988 mask(IMM8, 14),
2989 mask(IMM8, 15),
2990 mask(IMM8, 16),
2991 mask(IMM8, 17),
2992 mask(IMM8, 18),
2993 mask(IMM8, 19),
2994 mask(IMM8, 20),
2995 mask(IMM8, 21),
2996 mask(IMM8, 22),
2997 mask(IMM8, 23),
2998 mask(IMM8, 24),
2999 mask(IMM8, 25),
3000 mask(IMM8, 26),
3001 mask(IMM8, 27),
3002 mask(IMM8, 28),
3003 mask(IMM8, 29),
3004 mask(IMM8, 30),
3005 mask(IMM8, 31),
3006 ],
3007 );
3008 transmute(r)
3009 }
3010}
3011
3012#[inline]
3017#[target_feature(enable = "avx2")]
3018#[cfg_attr(test, assert_instr(vpsrlw))]
3019#[stable(feature = "simd_x86", since = "1.27.0")]
3020pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3021 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3022}
3023
3024#[inline]
3029#[target_feature(enable = "avx2")]
3030#[cfg_attr(test, assert_instr(vpsrld))]
3031#[stable(feature = "simd_x86", since = "1.27.0")]
3032pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3033 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3034}
3035
3036#[inline]
3041#[target_feature(enable = "avx2")]
3042#[cfg_attr(test, assert_instr(vpsrlq))]
3043#[stable(feature = "simd_x86", since = "1.27.0")]
3044pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3045 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3046}
3047
3048#[inline]
3053#[target_feature(enable = "avx2")]
3054#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3055#[rustc_legacy_const_generics(1)]
3056#[stable(feature = "simd_x86", since = "1.27.0")]
3057pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3058 static_assert_uimm_bits!(IMM8, 8);
3059 unsafe {
3060 if IMM8 >= 16 {
3061 _mm256_setzero_si256()
3062 } else {
3063 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3064 }
3065 }
3066}
3067
3068#[inline]
3073#[target_feature(enable = "avx2")]
3074#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3075#[rustc_legacy_const_generics(1)]
3076#[stable(feature = "simd_x86", since = "1.27.0")]
3077pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3078 static_assert_uimm_bits!(IMM8, 8);
3079 unsafe {
3080 if IMM8 >= 32 {
3081 _mm256_setzero_si256()
3082 } else {
3083 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3084 }
3085 }
3086}
3087
3088#[inline]
3093#[target_feature(enable = "avx2")]
3094#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3095#[rustc_legacy_const_generics(1)]
3096#[stable(feature = "simd_x86", since = "1.27.0")]
3097pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3098 static_assert_uimm_bits!(IMM8, 8);
3099 unsafe {
3100 if IMM8 >= 64 {
3101 _mm256_setzero_si256()
3102 } else {
3103 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3104 }
3105 }
3106}
3107
3108#[inline]
3113#[target_feature(enable = "avx2")]
3114#[cfg_attr(test, assert_instr(vpsrlvd))]
3115#[stable(feature = "simd_x86", since = "1.27.0")]
3116pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3117 unsafe {
3118 let count = count.as_u32x4();
3119 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3120 let count = simd_select(no_overflow, count, u32x4::ZERO);
3121 simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
3122 }
3123}
3124
3125#[inline]
3130#[target_feature(enable = "avx2")]
3131#[cfg_attr(test, assert_instr(vpsrlvd))]
3132#[stable(feature = "simd_x86", since = "1.27.0")]
3133pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3134 unsafe {
3135 let count = count.as_u32x8();
3136 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3137 let count = simd_select(no_overflow, count, u32x8::ZERO);
3138 simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
3139 }
3140}
3141
3142#[inline]
3147#[target_feature(enable = "avx2")]
3148#[cfg_attr(test, assert_instr(vpsrlvq))]
3149#[stable(feature = "simd_x86", since = "1.27.0")]
3150pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3151 unsafe {
3152 let count = count.as_u64x2();
3153 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3154 let count = simd_select(no_overflow, count, u64x2::ZERO);
3155 simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3156 }
3157}
3158
3159#[inline]
3164#[target_feature(enable = "avx2")]
3165#[cfg_attr(test, assert_instr(vpsrlvq))]
3166#[stable(feature = "simd_x86", since = "1.27.0")]
3167pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3168 unsafe {
3169 let count = count.as_u64x4();
3170 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3171 let count = simd_select(no_overflow, count, u64x4::ZERO);
3172 simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3173 }
3174}
3175
3176#[inline]
3182#[target_feature(enable = "avx2")]
3183#[cfg_attr(test, assert_instr(vmovntdqa))]
3184#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3185pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3186 let dst: __m256i;
3187 crate::arch::asm!(
3188 vpl!("vmovntdqa {a}"),
3189 a = out(ymm_reg) dst,
3190 p = in(reg) mem_addr,
3191 options(pure, readonly, nostack, preserves_flags),
3192 );
3193 dst
3194}
3195
3196#[inline]
3200#[target_feature(enable = "avx2")]
3201#[cfg_attr(test, assert_instr(vpsubw))]
3202#[stable(feature = "simd_x86", since = "1.27.0")]
3203pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3204 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3205}
3206
3207#[inline]
3211#[target_feature(enable = "avx2")]
3212#[cfg_attr(test, assert_instr(vpsubd))]
3213#[stable(feature = "simd_x86", since = "1.27.0")]
3214pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3215 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3216}
3217
3218#[inline]
3222#[target_feature(enable = "avx2")]
3223#[cfg_attr(test, assert_instr(vpsubq))]
3224#[stable(feature = "simd_x86", since = "1.27.0")]
3225pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3226 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3227}
3228
3229#[inline]
3233#[target_feature(enable = "avx2")]
3234#[cfg_attr(test, assert_instr(vpsubb))]
3235#[stable(feature = "simd_x86", since = "1.27.0")]
3236pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3237 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3238}
3239
3240#[inline]
3245#[target_feature(enable = "avx2")]
3246#[cfg_attr(test, assert_instr(vpsubsw))]
3247#[stable(feature = "simd_x86", since = "1.27.0")]
3248pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3249 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3250}
3251
3252#[inline]
3257#[target_feature(enable = "avx2")]
3258#[cfg_attr(test, assert_instr(vpsubsb))]
3259#[stable(feature = "simd_x86", since = "1.27.0")]
3260pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3261 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3262}
3263
3264#[inline]
3269#[target_feature(enable = "avx2")]
3270#[cfg_attr(test, assert_instr(vpsubusw))]
3271#[stable(feature = "simd_x86", since = "1.27.0")]
3272pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3273 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3274}
3275
3276#[inline]
3281#[target_feature(enable = "avx2")]
3282#[cfg_attr(test, assert_instr(vpsubusb))]
3283#[stable(feature = "simd_x86", since = "1.27.0")]
3284pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3285 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3286}
3287
3288#[inline]
3328#[target_feature(enable = "avx2")]
3329#[cfg_attr(test, assert_instr(vpunpckhbw))]
3330#[stable(feature = "simd_x86", since = "1.27.0")]
3331pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3332 unsafe {
3333 #[rustfmt::skip]
3334 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3335 8, 40, 9, 41, 10, 42, 11, 43,
3336 12, 44, 13, 45, 14, 46, 15, 47,
3337 24, 56, 25, 57, 26, 58, 27, 59,
3338 28, 60, 29, 61, 30, 62, 31, 63,
3339 ]);
3340 transmute(r)
3341 }
3342}
3343
3344#[inline]
3383#[target_feature(enable = "avx2")]
3384#[cfg_attr(test, assert_instr(vpunpcklbw))]
3385#[stable(feature = "simd_x86", since = "1.27.0")]
3386pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3387 unsafe {
3388 #[rustfmt::skip]
3389 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3390 0, 32, 1, 33, 2, 34, 3, 35,
3391 4, 36, 5, 37, 6, 38, 7, 39,
3392 16, 48, 17, 49, 18, 50, 19, 51,
3393 20, 52, 21, 53, 22, 54, 23, 55,
3394 ]);
3395 transmute(r)
3396 }
3397}
3398
3399#[inline]
3434#[target_feature(enable = "avx2")]
3435#[cfg_attr(test, assert_instr(vpunpckhwd))]
3436#[stable(feature = "simd_x86", since = "1.27.0")]
3437pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3438 unsafe {
3439 let r: i16x16 = simd_shuffle!(
3440 a.as_i16x16(),
3441 b.as_i16x16(),
3442 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3443 );
3444 transmute(r)
3445 }
3446}
3447
3448#[inline]
3484#[target_feature(enable = "avx2")]
3485#[cfg_attr(test, assert_instr(vpunpcklwd))]
3486#[stable(feature = "simd_x86", since = "1.27.0")]
3487pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3488 unsafe {
3489 let r: i16x16 = simd_shuffle!(
3490 a.as_i16x16(),
3491 b.as_i16x16(),
3492 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3493 );
3494 transmute(r)
3495 }
3496}
3497
3498#[inline]
3527#[target_feature(enable = "avx2")]
3528#[cfg_attr(test, assert_instr(vunpckhps))]
3529#[stable(feature = "simd_x86", since = "1.27.0")]
3530pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3531 unsafe {
3532 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3533 transmute(r)
3534 }
3535}
3536
3537#[inline]
3566#[target_feature(enable = "avx2")]
3567#[cfg_attr(test, assert_instr(vunpcklps))]
3568#[stable(feature = "simd_x86", since = "1.27.0")]
3569pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3570 unsafe {
3571 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3572 transmute(r)
3573 }
3574}
3575
3576#[inline]
3605#[target_feature(enable = "avx2")]
3606#[cfg_attr(test, assert_instr(vunpckhpd))]
3607#[stable(feature = "simd_x86", since = "1.27.0")]
3608pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3609 unsafe {
3610 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3611 transmute(r)
3612 }
3613}
3614
3615#[inline]
3644#[target_feature(enable = "avx2")]
3645#[cfg_attr(test, assert_instr(vunpcklpd))]
3646#[stable(feature = "simd_x86", since = "1.27.0")]
3647pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3648 unsafe {
3649 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3650 transmute(r)
3651 }
3652}
3653
3654#[inline]
3659#[target_feature(enable = "avx2")]
3660#[cfg_attr(test, assert_instr(vxorps))]
3661#[stable(feature = "simd_x86", since = "1.27.0")]
3662pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3663 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3664}
3665
3666#[inline]
3673#[target_feature(enable = "avx2")]
3674#[rustc_legacy_const_generics(1)]
3676#[stable(feature = "simd_x86", since = "1.27.0")]
3677pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3678 static_assert_uimm_bits!(INDEX, 5);
3679 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3680}
3681
3682#[inline]
3689#[target_feature(enable = "avx2")]
3690#[rustc_legacy_const_generics(1)]
3692#[stable(feature = "simd_x86", since = "1.27.0")]
3693pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3694 static_assert_uimm_bits!(INDEX, 4);
3695 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3696}
3697
3698#[allow(improper_ctypes)]
3699unsafe extern "C" {
3700 #[link_name = "llvm.x86.avx2.phadd.sw"]
3701 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3702 #[link_name = "llvm.x86.avx2.phsub.sw"]
3703 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3704 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3705 fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3706 #[link_name = "llvm.x86.avx2.mpsadbw"]
3707 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3708 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3709 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3710 #[link_name = "llvm.x86.avx2.packsswb"]
3711 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3712 #[link_name = "llvm.x86.avx2.packssdw"]
3713 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3714 #[link_name = "llvm.x86.avx2.packuswb"]
3715 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3716 #[link_name = "llvm.x86.avx2.packusdw"]
3717 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3718 #[link_name = "llvm.x86.avx2.psad.bw"]
3719 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3720 #[link_name = "llvm.x86.avx2.psign.b"]
3721 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3722 #[link_name = "llvm.x86.avx2.psign.w"]
3723 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3724 #[link_name = "llvm.x86.avx2.psign.d"]
3725 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3726 #[link_name = "llvm.x86.avx2.psll.w"]
3727 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3728 #[link_name = "llvm.x86.avx2.psll.d"]
3729 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3730 #[link_name = "llvm.x86.avx2.psll.q"]
3731 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3732 #[link_name = "llvm.x86.avx2.psra.w"]
3733 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3734 #[link_name = "llvm.x86.avx2.psra.d"]
3735 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3736 #[link_name = "llvm.x86.avx2.psrl.w"]
3737 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3738 #[link_name = "llvm.x86.avx2.psrl.d"]
3739 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3740 #[link_name = "llvm.x86.avx2.psrl.q"]
3741 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3742 #[link_name = "llvm.x86.avx2.pshuf.b"]
3743 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3744 #[link_name = "llvm.x86.avx2.permd"]
3745 fn permd(a: u32x8, b: u32x8) -> u32x8;
3746 #[link_name = "llvm.x86.avx2.permps"]
3747 fn permps(a: __m256, b: i32x8) -> __m256;
3748 #[link_name = "llvm.x86.avx2.gather.d.d"]
3749 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3750 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3751 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3752 #[link_name = "llvm.x86.avx2.gather.d.q"]
3753 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3754 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3755 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3756 #[link_name = "llvm.x86.avx2.gather.q.d"]
3757 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3758 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3759 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3760 #[link_name = "llvm.x86.avx2.gather.q.q"]
3761 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3762 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3763 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3764 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3765 fn pgatherdpd(
3766 src: __m128d,
3767 slice: *const i8,
3768 offsets: i32x4,
3769 mask: __m128d,
3770 scale: i8,
3771 ) -> __m128d;
3772 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3773 fn vpgatherdpd(
3774 src: __m256d,
3775 slice: *const i8,
3776 offsets: i32x4,
3777 mask: __m256d,
3778 scale: i8,
3779 ) -> __m256d;
3780 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3781 fn pgatherqpd(
3782 src: __m128d,
3783 slice: *const i8,
3784 offsets: i64x2,
3785 mask: __m128d,
3786 scale: i8,
3787 ) -> __m128d;
3788 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3789 fn vpgatherqpd(
3790 src: __m256d,
3791 slice: *const i8,
3792 offsets: i64x4,
3793 mask: __m256d,
3794 scale: i8,
3795 ) -> __m256d;
3796 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3797 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3798 -> __m128;
3799 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3800 fn vpgatherdps(
3801 src: __m256,
3802 slice: *const i8,
3803 offsets: i32x8,
3804 mask: __m256,
3805 scale: i8,
3806 ) -> __m256;
3807 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3808 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3809 -> __m128;
3810 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3811 fn vpgatherqps(
3812 src: __m128,
3813 slice: *const i8,
3814 offsets: i64x4,
3815 mask: __m128,
3816 scale: i8,
3817 ) -> __m128;
3818}
3819
3820#[cfg(test)]
3821mod tests {
3822
3823 use stdarch_test::simd_test;
3824
3825 use crate::core_arch::x86::*;
3826
3827 #[simd_test(enable = "avx2")]
3828 unsafe fn test_mm256_abs_epi32() {
3829 #[rustfmt::skip]
3830 let a = _mm256_setr_epi32(
3831 0, 1, -1, i32::MAX,
3832 i32::MIN, 100, -100, -32,
3833 );
3834 let r = _mm256_abs_epi32(a);
3835 #[rustfmt::skip]
3836 let e = _mm256_setr_epi32(
3837 0, 1, 1, i32::MAX,
3838 i32::MAX.wrapping_add(1), 100, 100, 32,
3839 );
3840 assert_eq_m256i(r, e);
3841 }
3842
3843 #[simd_test(enable = "avx2")]
3844 unsafe fn test_mm256_abs_epi16() {
3845 #[rustfmt::skip]
3846 let a = _mm256_setr_epi16(
3847 0, 1, -1, 2, -2, 3, -3, 4,
3848 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3849 );
3850 let r = _mm256_abs_epi16(a);
3851 #[rustfmt::skip]
3852 let e = _mm256_setr_epi16(
3853 0, 1, 1, 2, 2, 3, 3, 4,
3854 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3855 );
3856 assert_eq_m256i(r, e);
3857 }
3858
3859 #[simd_test(enable = "avx2")]
3860 unsafe fn test_mm256_abs_epi8() {
3861 #[rustfmt::skip]
3862 let a = _mm256_setr_epi8(
3863 0, 1, -1, 2, -2, 3, -3, 4,
3864 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3865 0, 1, -1, 2, -2, 3, -3, 4,
3866 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3867 );
3868 let r = _mm256_abs_epi8(a);
3869 #[rustfmt::skip]
3870 let e = _mm256_setr_epi8(
3871 0, 1, 1, 2, 2, 3, 3, 4,
3872 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3873 0, 1, 1, 2, 2, 3, 3, 4,
3874 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3875 );
3876 assert_eq_m256i(r, e);
3877 }
3878
3879 #[simd_test(enable = "avx2")]
3880 unsafe fn test_mm256_add_epi64() {
3881 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3882 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3883 let r = _mm256_add_epi64(a, b);
3884 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3885 assert_eq_m256i(r, e);
3886 }
3887
3888 #[simd_test(enable = "avx2")]
3889 unsafe fn test_mm256_add_epi32() {
3890 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3891 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3892 let r = _mm256_add_epi32(a, b);
3893 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3894 assert_eq_m256i(r, e);
3895 }
3896
3897 #[simd_test(enable = "avx2")]
3898 unsafe fn test_mm256_add_epi16() {
3899 #[rustfmt::skip]
3900 let a = _mm256_setr_epi16(
3901 0, 1, 2, 3, 4, 5, 6, 7,
3902 8, 9, 10, 11, 12, 13, 14, 15,
3903 );
3904 #[rustfmt::skip]
3905 let b = _mm256_setr_epi16(
3906 0, 1, 2, 3, 4, 5, 6, 7,
3907 8, 9, 10, 11, 12, 13, 14, 15,
3908 );
3909 let r = _mm256_add_epi16(a, b);
3910 #[rustfmt::skip]
3911 let e = _mm256_setr_epi16(
3912 0, 2, 4, 6, 8, 10, 12, 14,
3913 16, 18, 20, 22, 24, 26, 28, 30,
3914 );
3915 assert_eq_m256i(r, e);
3916 }
3917
3918 #[simd_test(enable = "avx2")]
3919 unsafe fn test_mm256_add_epi8() {
3920 #[rustfmt::skip]
3921 let a = _mm256_setr_epi8(
3922 0, 1, 2, 3, 4, 5, 6, 7,
3923 8, 9, 10, 11, 12, 13, 14, 15,
3924 16, 17, 18, 19, 20, 21, 22, 23,
3925 24, 25, 26, 27, 28, 29, 30, 31,
3926 );
3927 #[rustfmt::skip]
3928 let b = _mm256_setr_epi8(
3929 0, 1, 2, 3, 4, 5, 6, 7,
3930 8, 9, 10, 11, 12, 13, 14, 15,
3931 16, 17, 18, 19, 20, 21, 22, 23,
3932 24, 25, 26, 27, 28, 29, 30, 31,
3933 );
3934 let r = _mm256_add_epi8(a, b);
3935 #[rustfmt::skip]
3936 let e = _mm256_setr_epi8(
3937 0, 2, 4, 6, 8, 10, 12, 14,
3938 16, 18, 20, 22, 24, 26, 28, 30,
3939 32, 34, 36, 38, 40, 42, 44, 46,
3940 48, 50, 52, 54, 56, 58, 60, 62,
3941 );
3942 assert_eq_m256i(r, e);
3943 }
3944
3945 #[simd_test(enable = "avx2")]
3946 unsafe fn test_mm256_adds_epi8() {
3947 #[rustfmt::skip]
3948 let a = _mm256_setr_epi8(
3949 0, 1, 2, 3, 4, 5, 6, 7,
3950 8, 9, 10, 11, 12, 13, 14, 15,
3951 16, 17, 18, 19, 20, 21, 22, 23,
3952 24, 25, 26, 27, 28, 29, 30, 31,
3953 );
3954 #[rustfmt::skip]
3955 let b = _mm256_setr_epi8(
3956 32, 33, 34, 35, 36, 37, 38, 39,
3957 40, 41, 42, 43, 44, 45, 46, 47,
3958 48, 49, 50, 51, 52, 53, 54, 55,
3959 56, 57, 58, 59, 60, 61, 62, 63,
3960 );
3961 let r = _mm256_adds_epi8(a, b);
3962 #[rustfmt::skip]
3963 let e = _mm256_setr_epi8(
3964 32, 34, 36, 38, 40, 42, 44, 46,
3965 48, 50, 52, 54, 56, 58, 60, 62,
3966 64, 66, 68, 70, 72, 74, 76, 78,
3967 80, 82, 84, 86, 88, 90, 92, 94,
3968 );
3969 assert_eq_m256i(r, e);
3970 }
3971
3972 #[simd_test(enable = "avx2")]
3973 unsafe fn test_mm256_adds_epi8_saturate_positive() {
3974 let a = _mm256_set1_epi8(0x7F);
3975 let b = _mm256_set1_epi8(1);
3976 let r = _mm256_adds_epi8(a, b);
3977 assert_eq_m256i(r, a);
3978 }
3979
3980 #[simd_test(enable = "avx2")]
3981 unsafe fn test_mm256_adds_epi8_saturate_negative() {
3982 let a = _mm256_set1_epi8(-0x80);
3983 let b = _mm256_set1_epi8(-1);
3984 let r = _mm256_adds_epi8(a, b);
3985 assert_eq_m256i(r, a);
3986 }
3987
3988 #[simd_test(enable = "avx2")]
3989 unsafe fn test_mm256_adds_epi16() {
3990 #[rustfmt::skip]
3991 let a = _mm256_setr_epi16(
3992 0, 1, 2, 3, 4, 5, 6, 7,
3993 8, 9, 10, 11, 12, 13, 14, 15,
3994 );
3995 #[rustfmt::skip]
3996 let b = _mm256_setr_epi16(
3997 32, 33, 34, 35, 36, 37, 38, 39,
3998 40, 41, 42, 43, 44, 45, 46, 47,
3999 );
4000 let r = _mm256_adds_epi16(a, b);
4001 #[rustfmt::skip]
4002 let e = _mm256_setr_epi16(
4003 32, 34, 36, 38, 40, 42, 44, 46,
4004 48, 50, 52, 54, 56, 58, 60, 62,
4005 );
4006
4007 assert_eq_m256i(r, e);
4008 }
4009
4010 #[simd_test(enable = "avx2")]
4011 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4012 let a = _mm256_set1_epi16(0x7FFF);
4013 let b = _mm256_set1_epi16(1);
4014 let r = _mm256_adds_epi16(a, b);
4015 assert_eq_m256i(r, a);
4016 }
4017
4018 #[simd_test(enable = "avx2")]
4019 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4020 let a = _mm256_set1_epi16(-0x8000);
4021 let b = _mm256_set1_epi16(-1);
4022 let r = _mm256_adds_epi16(a, b);
4023 assert_eq_m256i(r, a);
4024 }
4025
4026 #[simd_test(enable = "avx2")]
4027 unsafe fn test_mm256_adds_epu8() {
4028 #[rustfmt::skip]
4029 let a = _mm256_setr_epi8(
4030 0, 1, 2, 3, 4, 5, 6, 7,
4031 8, 9, 10, 11, 12, 13, 14, 15,
4032 16, 17, 18, 19, 20, 21, 22, 23,
4033 24, 25, 26, 27, 28, 29, 30, 31,
4034 );
4035 #[rustfmt::skip]
4036 let b = _mm256_setr_epi8(
4037 32, 33, 34, 35, 36, 37, 38, 39,
4038 40, 41, 42, 43, 44, 45, 46, 47,
4039 48, 49, 50, 51, 52, 53, 54, 55,
4040 56, 57, 58, 59, 60, 61, 62, 63,
4041 );
4042 let r = _mm256_adds_epu8(a, b);
4043 #[rustfmt::skip]
4044 let e = _mm256_setr_epi8(
4045 32, 34, 36, 38, 40, 42, 44, 46,
4046 48, 50, 52, 54, 56, 58, 60, 62,
4047 64, 66, 68, 70, 72, 74, 76, 78,
4048 80, 82, 84, 86, 88, 90, 92, 94,
4049 );
4050 assert_eq_m256i(r, e);
4051 }
4052
4053 #[simd_test(enable = "avx2")]
4054 unsafe fn test_mm256_adds_epu8_saturate() {
4055 let a = _mm256_set1_epi8(!0);
4056 let b = _mm256_set1_epi8(1);
4057 let r = _mm256_adds_epu8(a, b);
4058 assert_eq_m256i(r, a);
4059 }
4060
4061 #[simd_test(enable = "avx2")]
4062 unsafe fn test_mm256_adds_epu16() {
4063 #[rustfmt::skip]
4064 let a = _mm256_setr_epi16(
4065 0, 1, 2, 3, 4, 5, 6, 7,
4066 8, 9, 10, 11, 12, 13, 14, 15,
4067 );
4068 #[rustfmt::skip]
4069 let b = _mm256_setr_epi16(
4070 32, 33, 34, 35, 36, 37, 38, 39,
4071 40, 41, 42, 43, 44, 45, 46, 47,
4072 );
4073 let r = _mm256_adds_epu16(a, b);
4074 #[rustfmt::skip]
4075 let e = _mm256_setr_epi16(
4076 32, 34, 36, 38, 40, 42, 44, 46,
4077 48, 50, 52, 54, 56, 58, 60, 62,
4078 );
4079
4080 assert_eq_m256i(r, e);
4081 }
4082
4083 #[simd_test(enable = "avx2")]
4084 unsafe fn test_mm256_adds_epu16_saturate() {
4085 let a = _mm256_set1_epi16(!0);
4086 let b = _mm256_set1_epi16(1);
4087 let r = _mm256_adds_epu16(a, b);
4088 assert_eq_m256i(r, a);
4089 }
4090
4091 #[simd_test(enable = "avx2")]
4092 unsafe fn test_mm256_and_si256() {
4093 let a = _mm256_set1_epi8(5);
4094 let b = _mm256_set1_epi8(3);
4095 let got = _mm256_and_si256(a, b);
4096 assert_eq_m256i(got, _mm256_set1_epi8(1));
4097 }
4098
4099 #[simd_test(enable = "avx2")]
4100 unsafe fn test_mm256_andnot_si256() {
4101 let a = _mm256_set1_epi8(5);
4102 let b = _mm256_set1_epi8(3);
4103 let got = _mm256_andnot_si256(a, b);
4104 assert_eq_m256i(got, _mm256_set1_epi8(2));
4105 }
4106
4107 #[simd_test(enable = "avx2")]
4108 unsafe fn test_mm256_avg_epu8() {
4109 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4110 let r = _mm256_avg_epu8(a, b);
4111 assert_eq_m256i(r, _mm256_set1_epi8(6));
4112 }
4113
4114 #[simd_test(enable = "avx2")]
4115 unsafe fn test_mm256_avg_epu16() {
4116 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4117 let r = _mm256_avg_epu16(a, b);
4118 assert_eq_m256i(r, _mm256_set1_epi16(6));
4119 }
4120
4121 #[simd_test(enable = "avx2")]
4122 unsafe fn test_mm_blend_epi32() {
4123 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4124 let e = _mm_setr_epi32(9, 3, 3, 3);
4125 let r = _mm_blend_epi32::<0x01>(a, b);
4126 assert_eq_m128i(r, e);
4127
4128 let r = _mm_blend_epi32::<0x0E>(b, a);
4129 assert_eq_m128i(r, e);
4130 }
4131
4132 #[simd_test(enable = "avx2")]
4133 unsafe fn test_mm256_blend_epi32() {
4134 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4135 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4136 let r = _mm256_blend_epi32::<0x01>(a, b);
4137 assert_eq_m256i(r, e);
4138
4139 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4140 let r = _mm256_blend_epi32::<0x82>(a, b);
4141 assert_eq_m256i(r, e);
4142
4143 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4144 let r = _mm256_blend_epi32::<0x7C>(a, b);
4145 assert_eq_m256i(r, e);
4146 }
4147
4148 #[simd_test(enable = "avx2")]
4149 unsafe fn test_mm256_blend_epi16() {
4150 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4151 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4152 let r = _mm256_blend_epi16::<0x01>(a, b);
4153 assert_eq_m256i(r, e);
4154
4155 let r = _mm256_blend_epi16::<0xFE>(b, a);
4156 assert_eq_m256i(r, e);
4157 }
4158
4159 #[simd_test(enable = "avx2")]
4160 unsafe fn test_mm256_blendv_epi8() {
4161 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4162 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4163 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4164 let r = _mm256_blendv_epi8(a, b, mask);
4165 assert_eq_m256i(r, e);
4166 }
4167
4168 #[simd_test(enable = "avx2")]
4169 unsafe fn test_mm_broadcastb_epi8() {
4170 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4171 let res = _mm_broadcastb_epi8(a);
4172 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4173 }
4174
4175 #[simd_test(enable = "avx2")]
4176 unsafe fn test_mm256_broadcastb_epi8() {
4177 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4178 let res = _mm256_broadcastb_epi8(a);
4179 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4180 }
4181
4182 #[simd_test(enable = "avx2")]
4183 unsafe fn test_mm_broadcastd_epi32() {
4184 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4185 let res = _mm_broadcastd_epi32(a);
4186 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4187 }
4188
4189 #[simd_test(enable = "avx2")]
4190 unsafe fn test_mm256_broadcastd_epi32() {
4191 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4192 let res = _mm256_broadcastd_epi32(a);
4193 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4194 }
4195
4196 #[simd_test(enable = "avx2")]
4197 unsafe fn test_mm_broadcastq_epi64() {
4198 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4199 let res = _mm_broadcastq_epi64(a);
4200 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4201 }
4202
4203 #[simd_test(enable = "avx2")]
4204 unsafe fn test_mm256_broadcastq_epi64() {
4205 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4206 let res = _mm256_broadcastq_epi64(a);
4207 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4208 }
4209
4210 #[simd_test(enable = "avx2")]
4211 unsafe fn test_mm_broadcastsd_pd() {
4212 let a = _mm_setr_pd(6.88, 3.44);
4213 let res = _mm_broadcastsd_pd(a);
4214 assert_eq_m128d(res, _mm_set1_pd(6.88));
4215 }
4216
4217 #[simd_test(enable = "avx2")]
4218 unsafe fn test_mm256_broadcastsd_pd() {
4219 let a = _mm_setr_pd(6.88, 3.44);
4220 let res = _mm256_broadcastsd_pd(a);
4221 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4222 }
4223
4224 #[simd_test(enable = "avx2")]
4225 unsafe fn test_mm_broadcastsi128_si256() {
4226 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4227 let res = _mm_broadcastsi128_si256(a);
4228 let retval = _mm256_setr_epi64x(
4229 0x0987654321012334,
4230 0x5678909876543210,
4231 0x0987654321012334,
4232 0x5678909876543210,
4233 );
4234 assert_eq_m256i(res, retval);
4235 }
4236
4237 #[simd_test(enable = "avx2")]
4238 unsafe fn test_mm256_broadcastsi128_si256() {
4239 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4240 let res = _mm256_broadcastsi128_si256(a);
4241 let retval = _mm256_setr_epi64x(
4242 0x0987654321012334,
4243 0x5678909876543210,
4244 0x0987654321012334,
4245 0x5678909876543210,
4246 );
4247 assert_eq_m256i(res, retval);
4248 }
4249
4250 #[simd_test(enable = "avx2")]
4251 unsafe fn test_mm_broadcastss_ps() {
4252 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4253 let res = _mm_broadcastss_ps(a);
4254 assert_eq_m128(res, _mm_set1_ps(6.88));
4255 }
4256
4257 #[simd_test(enable = "avx2")]
4258 unsafe fn test_mm256_broadcastss_ps() {
4259 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4260 let res = _mm256_broadcastss_ps(a);
4261 assert_eq_m256(res, _mm256_set1_ps(6.88));
4262 }
4263
4264 #[simd_test(enable = "avx2")]
4265 unsafe fn test_mm_broadcastw_epi16() {
4266 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4267 let res = _mm_broadcastw_epi16(a);
4268 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4269 }
4270
4271 #[simd_test(enable = "avx2")]
4272 unsafe fn test_mm256_broadcastw_epi16() {
4273 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4274 let res = _mm256_broadcastw_epi16(a);
4275 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4276 }
4277
4278 #[simd_test(enable = "avx2")]
4279 unsafe fn test_mm256_cmpeq_epi8() {
4280 #[rustfmt::skip]
4281 let a = _mm256_setr_epi8(
4282 0, 1, 2, 3, 4, 5, 6, 7,
4283 8, 9, 10, 11, 12, 13, 14, 15,
4284 16, 17, 18, 19, 20, 21, 22, 23,
4285 24, 25, 26, 27, 28, 29, 30, 31,
4286 );
4287 #[rustfmt::skip]
4288 let b = _mm256_setr_epi8(
4289 31, 30, 2, 28, 27, 26, 25, 24,
4290 23, 22, 21, 20, 19, 18, 17, 16,
4291 15, 14, 13, 12, 11, 10, 9, 8,
4292 7, 6, 5, 4, 3, 2, 1, 0,
4293 );
4294 let r = _mm256_cmpeq_epi8(a, b);
4295 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4296 }
4297
4298 #[simd_test(enable = "avx2")]
4299 unsafe fn test_mm256_cmpeq_epi16() {
4300 #[rustfmt::skip]
4301 let a = _mm256_setr_epi16(
4302 0, 1, 2, 3, 4, 5, 6, 7,
4303 8, 9, 10, 11, 12, 13, 14, 15,
4304 );
4305 #[rustfmt::skip]
4306 let b = _mm256_setr_epi16(
4307 15, 14, 2, 12, 11, 10, 9, 8,
4308 7, 6, 5, 4, 3, 2, 1, 0,
4309 );
4310 let r = _mm256_cmpeq_epi16(a, b);
4311 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4312 }
4313
4314 #[simd_test(enable = "avx2")]
4315 unsafe fn test_mm256_cmpeq_epi32() {
4316 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4317 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4318 let r = _mm256_cmpeq_epi32(a, b);
4319 let e = _mm256_set1_epi32(0);
4320 let e = _mm256_insert_epi32::<2>(e, !0);
4321 assert_eq_m256i(r, e);
4322 }
4323
4324 #[simd_test(enable = "avx2")]
4325 unsafe fn test_mm256_cmpeq_epi64() {
4326 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4327 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4328 let r = _mm256_cmpeq_epi64(a, b);
4329 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4330 }
4331
4332 #[simd_test(enable = "avx2")]
4333 unsafe fn test_mm256_cmpgt_epi8() {
4334 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4335 let b = _mm256_set1_epi8(0);
4336 let r = _mm256_cmpgt_epi8(a, b);
4337 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4338 }
4339
4340 #[simd_test(enable = "avx2")]
4341 unsafe fn test_mm256_cmpgt_epi16() {
4342 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4343 let b = _mm256_set1_epi16(0);
4344 let r = _mm256_cmpgt_epi16(a, b);
4345 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4346 }
4347
4348 #[simd_test(enable = "avx2")]
4349 unsafe fn test_mm256_cmpgt_epi32() {
4350 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4351 let b = _mm256_set1_epi32(0);
4352 let r = _mm256_cmpgt_epi32(a, b);
4353 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4354 }
4355
4356 #[simd_test(enable = "avx2")]
4357 unsafe fn test_mm256_cmpgt_epi64() {
4358 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4359 let b = _mm256_set1_epi64x(0);
4360 let r = _mm256_cmpgt_epi64(a, b);
4361 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4362 }
4363
4364 #[simd_test(enable = "avx2")]
4365 unsafe fn test_mm256_cvtepi8_epi16() {
4366 #[rustfmt::skip]
4367 let a = _mm_setr_epi8(
4368 0, 0, -1, 1, -2, 2, -3, 3,
4369 -4, 4, -5, 5, -6, 6, -7, 7,
4370 );
4371 #[rustfmt::skip]
4372 let r = _mm256_setr_epi16(
4373 0, 0, -1, 1, -2, 2, -3, 3,
4374 -4, 4, -5, 5, -6, 6, -7, 7,
4375 );
4376 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4377 }
4378
4379 #[simd_test(enable = "avx2")]
4380 unsafe fn test_mm256_cvtepi8_epi32() {
4381 #[rustfmt::skip]
4382 let a = _mm_setr_epi8(
4383 0, 0, -1, 1, -2, 2, -3, 3,
4384 -4, 4, -5, 5, -6, 6, -7, 7,
4385 );
4386 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4387 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4388 }
4389
4390 #[simd_test(enable = "avx2")]
4391 unsafe fn test_mm256_cvtepi8_epi64() {
4392 #[rustfmt::skip]
4393 let a = _mm_setr_epi8(
4394 0, 0, -1, 1, -2, 2, -3, 3,
4395 -4, 4, -5, 5, -6, 6, -7, 7,
4396 );
4397 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4398 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4399 }
4400
4401 #[simd_test(enable = "avx2")]
4402 unsafe fn test_mm256_cvtepi16_epi32() {
4403 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4404 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4405 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4406 }
4407
4408 #[simd_test(enable = "avx2")]
4409 unsafe fn test_mm256_cvtepi16_epi64() {
4410 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4411 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4412 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4413 }
4414
4415 #[simd_test(enable = "avx2")]
4416 unsafe fn test_mm256_cvtepi32_epi64() {
4417 let a = _mm_setr_epi32(0, 0, -1, 1);
4418 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4419 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4420 }
4421
4422 #[simd_test(enable = "avx2")]
4423 unsafe fn test_mm256_cvtepu16_epi32() {
4424 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4425 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4426 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4427 }
4428
4429 #[simd_test(enable = "avx2")]
4430 unsafe fn test_mm256_cvtepu16_epi64() {
4431 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4432 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4433 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4434 }
4435
4436 #[simd_test(enable = "avx2")]
4437 unsafe fn test_mm256_cvtepu32_epi64() {
4438 let a = _mm_setr_epi32(0, 1, 2, 3);
4439 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4440 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4441 }
4442
4443 #[simd_test(enable = "avx2")]
4444 unsafe fn test_mm256_cvtepu8_epi16() {
4445 #[rustfmt::skip]
4446 let a = _mm_setr_epi8(
4447 0, 1, 2, 3, 4, 5, 6, 7,
4448 8, 9, 10, 11, 12, 13, 14, 15,
4449 );
4450 #[rustfmt::skip]
4451 let r = _mm256_setr_epi16(
4452 0, 1, 2, 3, 4, 5, 6, 7,
4453 8, 9, 10, 11, 12, 13, 14, 15,
4454 );
4455 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4456 }
4457
4458 #[simd_test(enable = "avx2")]
4459 unsafe fn test_mm256_cvtepu8_epi32() {
4460 #[rustfmt::skip]
4461 let a = _mm_setr_epi8(
4462 0, 1, 2, 3, 4, 5, 6, 7,
4463 8, 9, 10, 11, 12, 13, 14, 15,
4464 );
4465 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4466 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4467 }
4468
4469 #[simd_test(enable = "avx2")]
4470 unsafe fn test_mm256_cvtepu8_epi64() {
4471 #[rustfmt::skip]
4472 let a = _mm_setr_epi8(
4473 0, 1, 2, 3, 4, 5, 6, 7,
4474 8, 9, 10, 11, 12, 13, 14, 15,
4475 );
4476 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4477 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4478 }
4479
4480 #[simd_test(enable = "avx2")]
4481 unsafe fn test_mm256_extracti128_si256() {
4482 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4483 let r = _mm256_extracti128_si256::<1>(a);
4484 let e = _mm_setr_epi64x(3, 4);
4485 assert_eq_m128i(r, e);
4486 }
4487
4488 #[simd_test(enable = "avx2")]
4489 unsafe fn test_mm256_hadd_epi16() {
4490 let a = _mm256_set1_epi16(2);
4491 let b = _mm256_set1_epi16(4);
4492 let r = _mm256_hadd_epi16(a, b);
4493 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4494 assert_eq_m256i(r, e);
4495 }
4496
4497 #[simd_test(enable = "avx2")]
4498 unsafe fn test_mm256_hadd_epi32() {
4499 let a = _mm256_set1_epi32(2);
4500 let b = _mm256_set1_epi32(4);
4501 let r = _mm256_hadd_epi32(a, b);
4502 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4503 assert_eq_m256i(r, e);
4504 }
4505
4506 #[simd_test(enable = "avx2")]
4507 unsafe fn test_mm256_hadds_epi16() {
4508 let a = _mm256_set1_epi16(2);
4509 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4510 let a = _mm256_insert_epi16::<1>(a, 1);
4511 let b = _mm256_set1_epi16(4);
4512 let r = _mm256_hadds_epi16(a, b);
4513 #[rustfmt::skip]
4514 let e = _mm256_setr_epi16(
4515 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4516 4, 4, 4, 4, 8, 8, 8, 8,
4517 );
4518 assert_eq_m256i(r, e);
4519 }
4520
4521 #[simd_test(enable = "avx2")]
4522 unsafe fn test_mm256_hsub_epi16() {
4523 let a = _mm256_set1_epi16(2);
4524 let b = _mm256_set1_epi16(4);
4525 let r = _mm256_hsub_epi16(a, b);
4526 let e = _mm256_set1_epi16(0);
4527 assert_eq_m256i(r, e);
4528 }
4529
4530 #[simd_test(enable = "avx2")]
4531 unsafe fn test_mm256_hsub_epi32() {
4532 let a = _mm256_set1_epi32(2);
4533 let b = _mm256_set1_epi32(4);
4534 let r = _mm256_hsub_epi32(a, b);
4535 let e = _mm256_set1_epi32(0);
4536 assert_eq_m256i(r, e);
4537 }
4538
4539 #[simd_test(enable = "avx2")]
4540 unsafe fn test_mm256_hsubs_epi16() {
4541 let a = _mm256_set1_epi16(2);
4542 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4543 let a = _mm256_insert_epi16::<1>(a, -1);
4544 let b = _mm256_set1_epi16(4);
4545 let r = _mm256_hsubs_epi16(a, b);
4546 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4547 assert_eq_m256i(r, e);
4548 }
4549
4550 #[simd_test(enable = "avx2")]
4551 unsafe fn test_mm256_madd_epi16() {
4552 let a = _mm256_set1_epi16(2);
4553 let b = _mm256_set1_epi16(4);
4554 let r = _mm256_madd_epi16(a, b);
4555 let e = _mm256_set1_epi32(16);
4556 assert_eq_m256i(r, e);
4557 }
4558
4559 #[simd_test(enable = "avx2")]
4560 unsafe fn test_mm256_inserti128_si256() {
4561 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4562 let b = _mm_setr_epi64x(7, 8);
4563 let r = _mm256_inserti128_si256::<1>(a, b);
4564 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4565 assert_eq_m256i(r, e);
4566 }
4567
4568 #[simd_test(enable = "avx2")]
4569 unsafe fn test_mm256_maddubs_epi16() {
4570 let a = _mm256_set1_epi8(2);
4571 let b = _mm256_set1_epi8(4);
4572 let r = _mm256_maddubs_epi16(a, b);
4573 let e = _mm256_set1_epi16(16);
4574 assert_eq_m256i(r, e);
4575 }
4576
4577 #[simd_test(enable = "avx2")]
4578 unsafe fn test_mm_maskload_epi32() {
4579 let nums = [1, 2, 3, 4];
4580 let a = &nums as *const i32;
4581 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4582 let r = _mm_maskload_epi32(a, mask);
4583 let e = _mm_setr_epi32(1, 0, 0, 4);
4584 assert_eq_m128i(r, e);
4585 }
4586
4587 #[simd_test(enable = "avx2")]
4588 unsafe fn test_mm256_maskload_epi32() {
4589 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4590 let a = &nums as *const i32;
4591 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4592 let r = _mm256_maskload_epi32(a, mask);
4593 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4594 assert_eq_m256i(r, e);
4595 }
4596
4597 #[simd_test(enable = "avx2")]
4598 unsafe fn test_mm_maskload_epi64() {
4599 let nums = [1_i64, 2_i64];
4600 let a = &nums as *const i64;
4601 let mask = _mm_setr_epi64x(0, -1);
4602 let r = _mm_maskload_epi64(a, mask);
4603 let e = _mm_setr_epi64x(0, 2);
4604 assert_eq_m128i(r, e);
4605 }
4606
4607 #[simd_test(enable = "avx2")]
4608 unsafe fn test_mm256_maskload_epi64() {
4609 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4610 let a = &nums as *const i64;
4611 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4612 let r = _mm256_maskload_epi64(a, mask);
4613 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4614 assert_eq_m256i(r, e);
4615 }
4616
4617 #[simd_test(enable = "avx2")]
4618 unsafe fn test_mm_maskstore_epi32() {
4619 let a = _mm_setr_epi32(1, 2, 3, 4);
4620 let mut arr = [-1, -1, -1, -1];
4621 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4622 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4623 let e = [1, -1, -1, 4];
4624 assert_eq!(arr, e);
4625 }
4626
4627 #[simd_test(enable = "avx2")]
4628 unsafe fn test_mm256_maskstore_epi32() {
4629 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4630 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4631 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4632 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4633 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4634 assert_eq!(arr, e);
4635 }
4636
4637 #[simd_test(enable = "avx2")]
4638 unsafe fn test_mm_maskstore_epi64() {
4639 let a = _mm_setr_epi64x(1_i64, 2_i64);
4640 let mut arr = [-1_i64, -1_i64];
4641 let mask = _mm_setr_epi64x(0, -1);
4642 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4643 let e = [-1, 2];
4644 assert_eq!(arr, e);
4645 }
4646
4647 #[simd_test(enable = "avx2")]
4648 unsafe fn test_mm256_maskstore_epi64() {
4649 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4650 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4651 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4652 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4653 let e = [-1, 2, 3, -1];
4654 assert_eq!(arr, e);
4655 }
4656
4657 #[simd_test(enable = "avx2")]
4658 unsafe fn test_mm256_max_epi16() {
4659 let a = _mm256_set1_epi16(2);
4660 let b = _mm256_set1_epi16(4);
4661 let r = _mm256_max_epi16(a, b);
4662 assert_eq_m256i(r, b);
4663 }
4664
4665 #[simd_test(enable = "avx2")]
4666 unsafe fn test_mm256_max_epi32() {
4667 let a = _mm256_set1_epi32(2);
4668 let b = _mm256_set1_epi32(4);
4669 let r = _mm256_max_epi32(a, b);
4670 assert_eq_m256i(r, b);
4671 }
4672
4673 #[simd_test(enable = "avx2")]
4674 unsafe fn test_mm256_max_epi8() {
4675 let a = _mm256_set1_epi8(2);
4676 let b = _mm256_set1_epi8(4);
4677 let r = _mm256_max_epi8(a, b);
4678 assert_eq_m256i(r, b);
4679 }
4680
4681 #[simd_test(enable = "avx2")]
4682 unsafe fn test_mm256_max_epu16() {
4683 let a = _mm256_set1_epi16(2);
4684 let b = _mm256_set1_epi16(4);
4685 let r = _mm256_max_epu16(a, b);
4686 assert_eq_m256i(r, b);
4687 }
4688
4689 #[simd_test(enable = "avx2")]
4690 unsafe fn test_mm256_max_epu32() {
4691 let a = _mm256_set1_epi32(2);
4692 let b = _mm256_set1_epi32(4);
4693 let r = _mm256_max_epu32(a, b);
4694 assert_eq_m256i(r, b);
4695 }
4696
4697 #[simd_test(enable = "avx2")]
4698 unsafe fn test_mm256_max_epu8() {
4699 let a = _mm256_set1_epi8(2);
4700 let b = _mm256_set1_epi8(4);
4701 let r = _mm256_max_epu8(a, b);
4702 assert_eq_m256i(r, b);
4703 }
4704
4705 #[simd_test(enable = "avx2")]
4706 unsafe fn test_mm256_min_epi16() {
4707 let a = _mm256_set1_epi16(2);
4708 let b = _mm256_set1_epi16(4);
4709 let r = _mm256_min_epi16(a, b);
4710 assert_eq_m256i(r, a);
4711 }
4712
4713 #[simd_test(enable = "avx2")]
4714 unsafe fn test_mm256_min_epi32() {
4715 let a = _mm256_set1_epi32(2);
4716 let b = _mm256_set1_epi32(4);
4717 let r = _mm256_min_epi32(a, b);
4718 assert_eq_m256i(r, a);
4719 }
4720
4721 #[simd_test(enable = "avx2")]
4722 unsafe fn test_mm256_min_epi8() {
4723 let a = _mm256_set1_epi8(2);
4724 let b = _mm256_set1_epi8(4);
4725 let r = _mm256_min_epi8(a, b);
4726 assert_eq_m256i(r, a);
4727 }
4728
4729 #[simd_test(enable = "avx2")]
4730 unsafe fn test_mm256_min_epu16() {
4731 let a = _mm256_set1_epi16(2);
4732 let b = _mm256_set1_epi16(4);
4733 let r = _mm256_min_epu16(a, b);
4734 assert_eq_m256i(r, a);
4735 }
4736
4737 #[simd_test(enable = "avx2")]
4738 unsafe fn test_mm256_min_epu32() {
4739 let a = _mm256_set1_epi32(2);
4740 let b = _mm256_set1_epi32(4);
4741 let r = _mm256_min_epu32(a, b);
4742 assert_eq_m256i(r, a);
4743 }
4744
4745 #[simd_test(enable = "avx2")]
4746 unsafe fn test_mm256_min_epu8() {
4747 let a = _mm256_set1_epi8(2);
4748 let b = _mm256_set1_epi8(4);
4749 let r = _mm256_min_epu8(a, b);
4750 assert_eq_m256i(r, a);
4751 }
4752
4753 #[simd_test(enable = "avx2")]
4754 unsafe fn test_mm256_movemask_epi8() {
4755 let a = _mm256_set1_epi8(-1);
4756 let r = _mm256_movemask_epi8(a);
4757 let e = -1;
4758 assert_eq!(r, e);
4759 }
4760
4761 #[simd_test(enable = "avx2")]
4762 unsafe fn test_mm256_mpsadbw_epu8() {
4763 let a = _mm256_set1_epi8(2);
4764 let b = _mm256_set1_epi8(4);
4765 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4766 let e = _mm256_set1_epi16(8);
4767 assert_eq_m256i(r, e);
4768 }
4769
4770 #[simd_test(enable = "avx2")]
4771 unsafe fn test_mm256_mul_epi32() {
4772 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4773 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4774 let r = _mm256_mul_epi32(a, b);
4775 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4776 assert_eq_m256i(r, e);
4777 }
4778
4779 #[simd_test(enable = "avx2")]
4780 unsafe fn test_mm256_mul_epu32() {
4781 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4782 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4783 let r = _mm256_mul_epu32(a, b);
4784 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4785 assert_eq_m256i(r, e);
4786 }
4787
4788 #[simd_test(enable = "avx2")]
4789 unsafe fn test_mm256_mulhi_epi16() {
4790 let a = _mm256_set1_epi16(6535);
4791 let b = _mm256_set1_epi16(6535);
4792 let r = _mm256_mulhi_epi16(a, b);
4793 let e = _mm256_set1_epi16(651);
4794 assert_eq_m256i(r, e);
4795 }
4796
4797 #[simd_test(enable = "avx2")]
4798 unsafe fn test_mm256_mulhi_epu16() {
4799 let a = _mm256_set1_epi16(6535);
4800 let b = _mm256_set1_epi16(6535);
4801 let r = _mm256_mulhi_epu16(a, b);
4802 let e = _mm256_set1_epi16(651);
4803 assert_eq_m256i(r, e);
4804 }
4805
4806 #[simd_test(enable = "avx2")]
4807 unsafe fn test_mm256_mullo_epi16() {
4808 let a = _mm256_set1_epi16(2);
4809 let b = _mm256_set1_epi16(4);
4810 let r = _mm256_mullo_epi16(a, b);
4811 let e = _mm256_set1_epi16(8);
4812 assert_eq_m256i(r, e);
4813 }
4814
4815 #[simd_test(enable = "avx2")]
4816 unsafe fn test_mm256_mullo_epi32() {
4817 let a = _mm256_set1_epi32(2);
4818 let b = _mm256_set1_epi32(4);
4819 let r = _mm256_mullo_epi32(a, b);
4820 let e = _mm256_set1_epi32(8);
4821 assert_eq_m256i(r, e);
4822 }
4823
4824 #[simd_test(enable = "avx2")]
4825 unsafe fn test_mm256_mulhrs_epi16() {
4826 let a = _mm256_set1_epi16(2);
4827 let b = _mm256_set1_epi16(4);
4828 let r = _mm256_mullo_epi16(a, b);
4829 let e = _mm256_set1_epi16(8);
4830 assert_eq_m256i(r, e);
4831 }
4832
4833 #[simd_test(enable = "avx2")]
4834 unsafe fn test_mm256_or_si256() {
4835 let a = _mm256_set1_epi8(-1);
4836 let b = _mm256_set1_epi8(0);
4837 let r = _mm256_or_si256(a, b);
4838 assert_eq_m256i(r, a);
4839 }
4840
4841 #[simd_test(enable = "avx2")]
4842 unsafe fn test_mm256_packs_epi16() {
4843 let a = _mm256_set1_epi16(2);
4844 let b = _mm256_set1_epi16(4);
4845 let r = _mm256_packs_epi16(a, b);
4846 #[rustfmt::skip]
4847 let e = _mm256_setr_epi8(
4848 2, 2, 2, 2, 2, 2, 2, 2,
4849 4, 4, 4, 4, 4, 4, 4, 4,
4850 2, 2, 2, 2, 2, 2, 2, 2,
4851 4, 4, 4, 4, 4, 4, 4, 4,
4852 );
4853
4854 assert_eq_m256i(r, e);
4855 }
4856
4857 #[simd_test(enable = "avx2")]
4858 unsafe fn test_mm256_packs_epi32() {
4859 let a = _mm256_set1_epi32(2);
4860 let b = _mm256_set1_epi32(4);
4861 let r = _mm256_packs_epi32(a, b);
4862 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4863
4864 assert_eq_m256i(r, e);
4865 }
4866
4867 #[simd_test(enable = "avx2")]
4868 unsafe fn test_mm256_packus_epi16() {
4869 let a = _mm256_set1_epi16(2);
4870 let b = _mm256_set1_epi16(4);
4871 let r = _mm256_packus_epi16(a, b);
4872 #[rustfmt::skip]
4873 let e = _mm256_setr_epi8(
4874 2, 2, 2, 2, 2, 2, 2, 2,
4875 4, 4, 4, 4, 4, 4, 4, 4,
4876 2, 2, 2, 2, 2, 2, 2, 2,
4877 4, 4, 4, 4, 4, 4, 4, 4,
4878 );
4879
4880 assert_eq_m256i(r, e);
4881 }
4882
4883 #[simd_test(enable = "avx2")]
4884 unsafe fn test_mm256_packus_epi32() {
4885 let a = _mm256_set1_epi32(2);
4886 let b = _mm256_set1_epi32(4);
4887 let r = _mm256_packus_epi32(a, b);
4888 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4889
4890 assert_eq_m256i(r, e);
4891 }
4892
4893 #[simd_test(enable = "avx2")]
4894 unsafe fn test_mm256_sad_epu8() {
4895 let a = _mm256_set1_epi8(2);
4896 let b = _mm256_set1_epi8(4);
4897 let r = _mm256_sad_epu8(a, b);
4898 let e = _mm256_set1_epi64x(16);
4899 assert_eq_m256i(r, e);
4900 }
4901
4902 #[simd_test(enable = "avx2")]
4903 unsafe fn test_mm256_shufflehi_epi16() {
4904 #[rustfmt::skip]
4905 let a = _mm256_setr_epi16(
4906 0, 1, 2, 3, 11, 22, 33, 44,
4907 4, 5, 6, 7, 55, 66, 77, 88,
4908 );
4909 #[rustfmt::skip]
4910 let e = _mm256_setr_epi16(
4911 0, 1, 2, 3, 44, 22, 22, 11,
4912 4, 5, 6, 7, 88, 66, 66, 55,
4913 );
4914 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
4915 assert_eq_m256i(r, e);
4916 }
4917
4918 #[simd_test(enable = "avx2")]
4919 unsafe fn test_mm256_shufflelo_epi16() {
4920 #[rustfmt::skip]
4921 let a = _mm256_setr_epi16(
4922 11, 22, 33, 44, 0, 1, 2, 3,
4923 55, 66, 77, 88, 4, 5, 6, 7,
4924 );
4925 #[rustfmt::skip]
4926 let e = _mm256_setr_epi16(
4927 44, 22, 22, 11, 0, 1, 2, 3,
4928 88, 66, 66, 55, 4, 5, 6, 7,
4929 );
4930 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
4931 assert_eq_m256i(r, e);
4932 }
4933
4934 #[simd_test(enable = "avx2")]
4935 unsafe fn test_mm256_sign_epi16() {
4936 let a = _mm256_set1_epi16(2);
4937 let b = _mm256_set1_epi16(-1);
4938 let r = _mm256_sign_epi16(a, b);
4939 let e = _mm256_set1_epi16(-2);
4940 assert_eq_m256i(r, e);
4941 }
4942
4943 #[simd_test(enable = "avx2")]
4944 unsafe fn test_mm256_sign_epi32() {
4945 let a = _mm256_set1_epi32(2);
4946 let b = _mm256_set1_epi32(-1);
4947 let r = _mm256_sign_epi32(a, b);
4948 let e = _mm256_set1_epi32(-2);
4949 assert_eq_m256i(r, e);
4950 }
4951
4952 #[simd_test(enable = "avx2")]
4953 unsafe fn test_mm256_sign_epi8() {
4954 let a = _mm256_set1_epi8(2);
4955 let b = _mm256_set1_epi8(-1);
4956 let r = _mm256_sign_epi8(a, b);
4957 let e = _mm256_set1_epi8(-2);
4958 assert_eq_m256i(r, e);
4959 }
4960
4961 #[simd_test(enable = "avx2")]
4962 unsafe fn test_mm256_sll_epi16() {
4963 let a = _mm256_set1_epi16(0xFF);
4964 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
4965 let r = _mm256_sll_epi16(a, b);
4966 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
4967 }
4968
4969 #[simd_test(enable = "avx2")]
4970 unsafe fn test_mm256_sll_epi32() {
4971 let a = _mm256_set1_epi32(0xFFFF);
4972 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
4973 let r = _mm256_sll_epi32(a, b);
4974 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
4975 }
4976
4977 #[simd_test(enable = "avx2")]
4978 unsafe fn test_mm256_sll_epi64() {
4979 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4980 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
4981 let r = _mm256_sll_epi64(a, b);
4982 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
4983 }
4984
4985 #[simd_test(enable = "avx2")]
4986 unsafe fn test_mm256_slli_epi16() {
4987 assert_eq_m256i(
4988 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
4989 _mm256_set1_epi16(0xFF0),
4990 );
4991 }
4992
4993 #[simd_test(enable = "avx2")]
4994 unsafe fn test_mm256_slli_epi32() {
4995 assert_eq_m256i(
4996 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
4997 _mm256_set1_epi32(0xFFFF0),
4998 );
4999 }
5000
5001 #[simd_test(enable = "avx2")]
5002 unsafe fn test_mm256_slli_epi64() {
5003 assert_eq_m256i(
5004 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5005 _mm256_set1_epi64x(0xFFFFFFFF0),
5006 );
5007 }
5008
5009 #[simd_test(enable = "avx2")]
5010 unsafe fn test_mm256_slli_si256() {
5011 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5012 let r = _mm256_slli_si256::<3>(a);
5013 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5014 }
5015
5016 #[simd_test(enable = "avx2")]
5017 unsafe fn test_mm_sllv_epi32() {
5018 let a = _mm_set1_epi32(2);
5019 let b = _mm_set1_epi32(1);
5020 let r = _mm_sllv_epi32(a, b);
5021 let e = _mm_set1_epi32(4);
5022 assert_eq_m128i(r, e);
5023 }
5024
5025 #[simd_test(enable = "avx2")]
5026 unsafe fn test_mm256_sllv_epi32() {
5027 let a = _mm256_set1_epi32(2);
5028 let b = _mm256_set1_epi32(1);
5029 let r = _mm256_sllv_epi32(a, b);
5030 let e = _mm256_set1_epi32(4);
5031 assert_eq_m256i(r, e);
5032 }
5033
5034 #[simd_test(enable = "avx2")]
5035 unsafe fn test_mm_sllv_epi64() {
5036 let a = _mm_set1_epi64x(2);
5037 let b = _mm_set1_epi64x(1);
5038 let r = _mm_sllv_epi64(a, b);
5039 let e = _mm_set1_epi64x(4);
5040 assert_eq_m128i(r, e);
5041 }
5042
5043 #[simd_test(enable = "avx2")]
5044 unsafe fn test_mm256_sllv_epi64() {
5045 let a = _mm256_set1_epi64x(2);
5046 let b = _mm256_set1_epi64x(1);
5047 let r = _mm256_sllv_epi64(a, b);
5048 let e = _mm256_set1_epi64x(4);
5049 assert_eq_m256i(r, e);
5050 }
5051
5052 #[simd_test(enable = "avx2")]
5053 unsafe fn test_mm256_sra_epi16() {
5054 let a = _mm256_set1_epi16(-1);
5055 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5056 let r = _mm256_sra_epi16(a, b);
5057 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5058 }
5059
5060 #[simd_test(enable = "avx2")]
5061 unsafe fn test_mm256_sra_epi32() {
5062 let a = _mm256_set1_epi32(-1);
5063 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5064 let r = _mm256_sra_epi32(a, b);
5065 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5066 }
5067
5068 #[simd_test(enable = "avx2")]
5069 unsafe fn test_mm256_srai_epi16() {
5070 assert_eq_m256i(
5071 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5072 _mm256_set1_epi16(-1),
5073 );
5074 }
5075
5076 #[simd_test(enable = "avx2")]
5077 unsafe fn test_mm256_srai_epi32() {
5078 assert_eq_m256i(
5079 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5080 _mm256_set1_epi32(-1),
5081 );
5082 }
5083
5084 #[simd_test(enable = "avx2")]
5085 unsafe fn test_mm_srav_epi32() {
5086 let a = _mm_set1_epi32(4);
5087 let count = _mm_set1_epi32(1);
5088 let r = _mm_srav_epi32(a, count);
5089 let e = _mm_set1_epi32(2);
5090 assert_eq_m128i(r, e);
5091 }
5092
5093 #[simd_test(enable = "avx2")]
5094 unsafe fn test_mm256_srav_epi32() {
5095 let a = _mm256_set1_epi32(4);
5096 let count = _mm256_set1_epi32(1);
5097 let r = _mm256_srav_epi32(a, count);
5098 let e = _mm256_set1_epi32(2);
5099 assert_eq_m256i(r, e);
5100 }
5101
5102 #[simd_test(enable = "avx2")]
5103 unsafe fn test_mm256_srli_si256() {
5104 #[rustfmt::skip]
5105 let a = _mm256_setr_epi8(
5106 1, 2, 3, 4, 5, 6, 7, 8,
5107 9, 10, 11, 12, 13, 14, 15, 16,
5108 17, 18, 19, 20, 21, 22, 23, 24,
5109 25, 26, 27, 28, 29, 30, 31, 32,
5110 );
5111 let r = _mm256_srli_si256::<3>(a);
5112 #[rustfmt::skip]
5113 let e = _mm256_setr_epi8(
5114 4, 5, 6, 7, 8, 9, 10, 11,
5115 12, 13, 14, 15, 16, 0, 0, 0,
5116 20, 21, 22, 23, 24, 25, 26, 27,
5117 28, 29, 30, 31, 32, 0, 0, 0,
5118 );
5119 assert_eq_m256i(r, e);
5120 }
5121
5122 #[simd_test(enable = "avx2")]
5123 unsafe fn test_mm256_srl_epi16() {
5124 let a = _mm256_set1_epi16(0xFF);
5125 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5126 let r = _mm256_srl_epi16(a, b);
5127 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5128 }
5129
5130 #[simd_test(enable = "avx2")]
5131 unsafe fn test_mm256_srl_epi32() {
5132 let a = _mm256_set1_epi32(0xFFFF);
5133 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5134 let r = _mm256_srl_epi32(a, b);
5135 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5136 }
5137
5138 #[simd_test(enable = "avx2")]
5139 unsafe fn test_mm256_srl_epi64() {
5140 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5141 let b = _mm_setr_epi64x(4, 0);
5142 let r = _mm256_srl_epi64(a, b);
5143 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5144 }
5145
5146 #[simd_test(enable = "avx2")]
5147 unsafe fn test_mm256_srli_epi16() {
5148 assert_eq_m256i(
5149 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5150 _mm256_set1_epi16(0xF),
5151 );
5152 }
5153
5154 #[simd_test(enable = "avx2")]
5155 unsafe fn test_mm256_srli_epi32() {
5156 assert_eq_m256i(
5157 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5158 _mm256_set1_epi32(0xFFF),
5159 );
5160 }
5161
5162 #[simd_test(enable = "avx2")]
5163 unsafe fn test_mm256_srli_epi64() {
5164 assert_eq_m256i(
5165 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5166 _mm256_set1_epi64x(0xFFFFFFF),
5167 );
5168 }
5169
5170 #[simd_test(enable = "avx2")]
5171 unsafe fn test_mm_srlv_epi32() {
5172 let a = _mm_set1_epi32(2);
5173 let count = _mm_set1_epi32(1);
5174 let r = _mm_srlv_epi32(a, count);
5175 let e = _mm_set1_epi32(1);
5176 assert_eq_m128i(r, e);
5177 }
5178
5179 #[simd_test(enable = "avx2")]
5180 unsafe fn test_mm256_srlv_epi32() {
5181 let a = _mm256_set1_epi32(2);
5182 let count = _mm256_set1_epi32(1);
5183 let r = _mm256_srlv_epi32(a, count);
5184 let e = _mm256_set1_epi32(1);
5185 assert_eq_m256i(r, e);
5186 }
5187
5188 #[simd_test(enable = "avx2")]
5189 unsafe fn test_mm_srlv_epi64() {
5190 let a = _mm_set1_epi64x(2);
5191 let count = _mm_set1_epi64x(1);
5192 let r = _mm_srlv_epi64(a, count);
5193 let e = _mm_set1_epi64x(1);
5194 assert_eq_m128i(r, e);
5195 }
5196
5197 #[simd_test(enable = "avx2")]
5198 unsafe fn test_mm256_srlv_epi64() {
5199 let a = _mm256_set1_epi64x(2);
5200 let count = _mm256_set1_epi64x(1);
5201 let r = _mm256_srlv_epi64(a, count);
5202 let e = _mm256_set1_epi64x(1);
5203 assert_eq_m256i(r, e);
5204 }
5205
5206 #[simd_test(enable = "avx2")]
5207 unsafe fn test_mm256_stream_load_si256() {
5208 let a = _mm256_set_epi64x(5, 6, 7, 8);
5209 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5210 assert_eq_m256i(a, r);
5211 }
5212
5213 #[simd_test(enable = "avx2")]
5214 unsafe fn test_mm256_sub_epi16() {
5215 let a = _mm256_set1_epi16(4);
5216 let b = _mm256_set1_epi16(2);
5217 let r = _mm256_sub_epi16(a, b);
5218 assert_eq_m256i(r, b);
5219 }
5220
5221 #[simd_test(enable = "avx2")]
5222 unsafe fn test_mm256_sub_epi32() {
5223 let a = _mm256_set1_epi32(4);
5224 let b = _mm256_set1_epi32(2);
5225 let r = _mm256_sub_epi32(a, b);
5226 assert_eq_m256i(r, b);
5227 }
5228
5229 #[simd_test(enable = "avx2")]
5230 unsafe fn test_mm256_sub_epi64() {
5231 let a = _mm256_set1_epi64x(4);
5232 let b = _mm256_set1_epi64x(2);
5233 let r = _mm256_sub_epi64(a, b);
5234 assert_eq_m256i(r, b);
5235 }
5236
5237 #[simd_test(enable = "avx2")]
5238 unsafe fn test_mm256_sub_epi8() {
5239 let a = _mm256_set1_epi8(4);
5240 let b = _mm256_set1_epi8(2);
5241 let r = _mm256_sub_epi8(a, b);
5242 assert_eq_m256i(r, b);
5243 }
5244
5245 #[simd_test(enable = "avx2")]
5246 unsafe fn test_mm256_subs_epi16() {
5247 let a = _mm256_set1_epi16(4);
5248 let b = _mm256_set1_epi16(2);
5249 let r = _mm256_subs_epi16(a, b);
5250 assert_eq_m256i(r, b);
5251 }
5252
5253 #[simd_test(enable = "avx2")]
5254 unsafe fn test_mm256_subs_epi8() {
5255 let a = _mm256_set1_epi8(4);
5256 let b = _mm256_set1_epi8(2);
5257 let r = _mm256_subs_epi8(a, b);
5258 assert_eq_m256i(r, b);
5259 }
5260
5261 #[simd_test(enable = "avx2")]
5262 unsafe fn test_mm256_subs_epu16() {
5263 let a = _mm256_set1_epi16(4);
5264 let b = _mm256_set1_epi16(2);
5265 let r = _mm256_subs_epu16(a, b);
5266 assert_eq_m256i(r, b);
5267 }
5268
5269 #[simd_test(enable = "avx2")]
5270 unsafe fn test_mm256_subs_epu8() {
5271 let a = _mm256_set1_epi8(4);
5272 let b = _mm256_set1_epi8(2);
5273 let r = _mm256_subs_epu8(a, b);
5274 assert_eq_m256i(r, b);
5275 }
5276
5277 #[simd_test(enable = "avx2")]
5278 unsafe fn test_mm256_xor_si256() {
5279 let a = _mm256_set1_epi8(5);
5280 let b = _mm256_set1_epi8(3);
5281 let r = _mm256_xor_si256(a, b);
5282 assert_eq_m256i(r, _mm256_set1_epi8(6));
5283 }
5284
5285 #[simd_test(enable = "avx2")]
5286 unsafe fn test_mm256_alignr_epi8() {
5287 #[rustfmt::skip]
5288 let a = _mm256_setr_epi8(
5289 1, 2, 3, 4, 5, 6, 7, 8,
5290 9, 10, 11, 12, 13, 14, 15, 16,
5291 17, 18, 19, 20, 21, 22, 23, 24,
5292 25, 26, 27, 28, 29, 30, 31, 32,
5293 );
5294 #[rustfmt::skip]
5295 let b = _mm256_setr_epi8(
5296 -1, -2, -3, -4, -5, -6, -7, -8,
5297 -9, -10, -11, -12, -13, -14, -15, -16,
5298 -17, -18, -19, -20, -21, -22, -23, -24,
5299 -25, -26, -27, -28, -29, -30, -31, -32,
5300 );
5301 let r = _mm256_alignr_epi8::<33>(a, b);
5302 assert_eq_m256i(r, _mm256_set1_epi8(0));
5303
5304 let r = _mm256_alignr_epi8::<17>(a, b);
5305 #[rustfmt::skip]
5306 let expected = _mm256_setr_epi8(
5307 2, 3, 4, 5, 6, 7, 8, 9,
5308 10, 11, 12, 13, 14, 15, 16, 0,
5309 18, 19, 20, 21, 22, 23, 24, 25,
5310 26, 27, 28, 29, 30, 31, 32, 0,
5311 );
5312 assert_eq_m256i(r, expected);
5313
5314 let r = _mm256_alignr_epi8::<4>(a, b);
5315 #[rustfmt::skip]
5316 let expected = _mm256_setr_epi8(
5317 -5, -6, -7, -8, -9, -10, -11, -12,
5318 -13, -14, -15, -16, 1, 2, 3, 4,
5319 -21, -22, -23, -24, -25, -26, -27, -28,
5320 -29, -30, -31, -32, 17, 18, 19, 20,
5321 );
5322 assert_eq_m256i(r, expected);
5323
5324 let r = _mm256_alignr_epi8::<15>(a, b);
5325 #[rustfmt::skip]
5326 let expected = _mm256_setr_epi8(
5327 -16, 1, 2, 3, 4, 5, 6, 7,
5328 8, 9, 10, 11, 12, 13, 14, 15,
5329 -32, 17, 18, 19, 20, 21, 22, 23,
5330 24, 25, 26, 27, 28, 29, 30, 31,
5331 );
5332 assert_eq_m256i(r, expected);
5333
5334 let r = _mm256_alignr_epi8::<0>(a, b);
5335 assert_eq_m256i(r, b);
5336
5337 let r = _mm256_alignr_epi8::<16>(a, b);
5338 assert_eq_m256i(r, a);
5339 }
5340
5341 #[simd_test(enable = "avx2")]
5342 unsafe fn test_mm256_shuffle_epi8() {
5343 #[rustfmt::skip]
5344 let a = _mm256_setr_epi8(
5345 1, 2, 3, 4, 5, 6, 7, 8,
5346 9, 10, 11, 12, 13, 14, 15, 16,
5347 17, 18, 19, 20, 21, 22, 23, 24,
5348 25, 26, 27, 28, 29, 30, 31, 32,
5349 );
5350 #[rustfmt::skip]
5351 let b = _mm256_setr_epi8(
5352 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5353 12, 5, 5, 10, 4, 1, 8, 0,
5354 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5355 12, 5, 5, 10, 4, 1, 8, 0,
5356 );
5357 #[rustfmt::skip]
5358 let expected = _mm256_setr_epi8(
5359 5, 0, 5, 4, 9, 13, 7, 4,
5360 13, 6, 6, 11, 5, 2, 9, 1,
5361 21, 0, 21, 20, 25, 29, 23, 20,
5362 29, 22, 22, 27, 21, 18, 25, 17,
5363 );
5364 let r = _mm256_shuffle_epi8(a, b);
5365 assert_eq_m256i(r, expected);
5366 }
5367
5368 #[simd_test(enable = "avx2")]
5369 unsafe fn test_mm256_permutevar8x32_epi32() {
5370 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5371 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5372 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5373 let r = _mm256_permutevar8x32_epi32(a, b);
5374 assert_eq_m256i(r, expected);
5375 }
5376
5377 #[simd_test(enable = "avx2")]
5378 unsafe fn test_mm256_permute4x64_epi64() {
5379 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5380 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5381 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5382 assert_eq_m256i(r, expected);
5383 }
5384
5385 #[simd_test(enable = "avx2")]
5386 unsafe fn test_mm256_permute2x128_si256() {
5387 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5388 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5389 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5390 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5391 assert_eq_m256i(r, e);
5392 }
5393
5394 #[simd_test(enable = "avx2")]
5395 unsafe fn test_mm256_permute4x64_pd() {
5396 let a = _mm256_setr_pd(1., 2., 3., 4.);
5397 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5398 let e = _mm256_setr_pd(4., 1., 2., 1.);
5399 assert_eq_m256d(r, e);
5400 }
5401
5402 #[simd_test(enable = "avx2")]
5403 unsafe fn test_mm256_permutevar8x32_ps() {
5404 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5405 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5406 let r = _mm256_permutevar8x32_ps(a, b);
5407 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5408 assert_eq_m256(r, e);
5409 }
5410
5411 #[simd_test(enable = "avx2")]
5412 unsafe fn test_mm_i32gather_epi32() {
5413 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5414 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5416 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5417 }
5418
5419 #[simd_test(enable = "avx2")]
5420 unsafe fn test_mm_mask_i32gather_epi32() {
5421 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5422 let r = _mm_mask_i32gather_epi32::<4>(
5424 _mm_set1_epi32(256),
5425 arr.as_ptr(),
5426 _mm_setr_epi32(0, 16, 64, 96),
5427 _mm_setr_epi32(-1, -1, -1, 0),
5428 );
5429 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5430 }
5431
5432 #[simd_test(enable = "avx2")]
5433 unsafe fn test_mm256_i32gather_epi32() {
5434 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5435 let r =
5437 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5438 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5439 }
5440
5441 #[simd_test(enable = "avx2")]
5442 unsafe fn test_mm256_mask_i32gather_epi32() {
5443 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5444 let r = _mm256_mask_i32gather_epi32::<4>(
5446 _mm256_set1_epi32(256),
5447 arr.as_ptr(),
5448 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5449 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5450 );
5451 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5452 }
5453
5454 #[simd_test(enable = "avx2")]
5455 unsafe fn test_mm_i32gather_ps() {
5456 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5457 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5459 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5460 }
5461
5462 #[simd_test(enable = "avx2")]
5463 unsafe fn test_mm_mask_i32gather_ps() {
5464 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5465 let r = _mm_mask_i32gather_ps::<4>(
5467 _mm_set1_ps(256.0),
5468 arr.as_ptr(),
5469 _mm_setr_epi32(0, 16, 64, 96),
5470 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5471 );
5472 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5473 }
5474
5475 #[simd_test(enable = "avx2")]
5476 unsafe fn test_mm256_i32gather_ps() {
5477 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5478 let r =
5480 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5481 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5482 }
5483
5484 #[simd_test(enable = "avx2")]
5485 unsafe fn test_mm256_mask_i32gather_ps() {
5486 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5487 let r = _mm256_mask_i32gather_ps::<4>(
5489 _mm256_set1_ps(256.0),
5490 arr.as_ptr(),
5491 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5492 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5493 );
5494 assert_eq_m256(
5495 r,
5496 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5497 );
5498 }
5499
5500 #[simd_test(enable = "avx2")]
5501 unsafe fn test_mm_i32gather_epi64() {
5502 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5503 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5505 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5506 }
5507
5508 #[simd_test(enable = "avx2")]
5509 unsafe fn test_mm_mask_i32gather_epi64() {
5510 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5511 let r = _mm_mask_i32gather_epi64::<8>(
5513 _mm_set1_epi64x(256),
5514 arr.as_ptr(),
5515 _mm_setr_epi32(16, 16, 16, 16),
5516 _mm_setr_epi64x(-1, 0),
5517 );
5518 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5519 }
5520
5521 #[simd_test(enable = "avx2")]
5522 unsafe fn test_mm256_i32gather_epi64() {
5523 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5524 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5526 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5527 }
5528
5529 #[simd_test(enable = "avx2")]
5530 unsafe fn test_mm256_mask_i32gather_epi64() {
5531 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5532 let r = _mm256_mask_i32gather_epi64::<8>(
5534 _mm256_set1_epi64x(256),
5535 arr.as_ptr(),
5536 _mm_setr_epi32(0, 16, 64, 96),
5537 _mm256_setr_epi64x(-1, -1, -1, 0),
5538 );
5539 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5540 }
5541
5542 #[simd_test(enable = "avx2")]
5543 unsafe fn test_mm_i32gather_pd() {
5544 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5545 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5547 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5548 }
5549
5550 #[simd_test(enable = "avx2")]
5551 unsafe fn test_mm_mask_i32gather_pd() {
5552 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5553 let r = _mm_mask_i32gather_pd::<8>(
5555 _mm_set1_pd(256.0),
5556 arr.as_ptr(),
5557 _mm_setr_epi32(16, 16, 16, 16),
5558 _mm_setr_pd(-1.0, 0.0),
5559 );
5560 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5561 }
5562
5563 #[simd_test(enable = "avx2")]
5564 unsafe fn test_mm256_i32gather_pd() {
5565 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5566 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5568 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5569 }
5570
5571 #[simd_test(enable = "avx2")]
5572 unsafe fn test_mm256_mask_i32gather_pd() {
5573 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5574 let r = _mm256_mask_i32gather_pd::<8>(
5576 _mm256_set1_pd(256.0),
5577 arr.as_ptr(),
5578 _mm_setr_epi32(0, 16, 64, 96),
5579 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5580 );
5581 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5582 }
5583
5584 #[simd_test(enable = "avx2")]
5585 unsafe fn test_mm_i64gather_epi32() {
5586 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5587 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5589 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5590 }
5591
5592 #[simd_test(enable = "avx2")]
5593 unsafe fn test_mm_mask_i64gather_epi32() {
5594 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5595 let r = _mm_mask_i64gather_epi32::<4>(
5597 _mm_set1_epi32(256),
5598 arr.as_ptr(),
5599 _mm_setr_epi64x(0, 16),
5600 _mm_setr_epi32(-1, 0, -1, 0),
5601 );
5602 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5603 }
5604
5605 #[simd_test(enable = "avx2")]
5606 unsafe fn test_mm256_i64gather_epi32() {
5607 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5608 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5610 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5611 }
5612
5613 #[simd_test(enable = "avx2")]
5614 unsafe fn test_mm256_mask_i64gather_epi32() {
5615 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5616 let r = _mm256_mask_i64gather_epi32::<4>(
5618 _mm_set1_epi32(256),
5619 arr.as_ptr(),
5620 _mm256_setr_epi64x(0, 16, 64, 96),
5621 _mm_setr_epi32(-1, -1, -1, 0),
5622 );
5623 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5624 }
5625
5626 #[simd_test(enable = "avx2")]
5627 unsafe fn test_mm_i64gather_ps() {
5628 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5629 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5631 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5632 }
5633
5634 #[simd_test(enable = "avx2")]
5635 unsafe fn test_mm_mask_i64gather_ps() {
5636 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5637 let r = _mm_mask_i64gather_ps::<4>(
5639 _mm_set1_ps(256.0),
5640 arr.as_ptr(),
5641 _mm_setr_epi64x(0, 16),
5642 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5643 );
5644 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5645 }
5646
5647 #[simd_test(enable = "avx2")]
5648 unsafe fn test_mm256_i64gather_ps() {
5649 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5650 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5652 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5653 }
5654
5655 #[simd_test(enable = "avx2")]
5656 unsafe fn test_mm256_mask_i64gather_ps() {
5657 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5658 let r = _mm256_mask_i64gather_ps::<4>(
5660 _mm_set1_ps(256.0),
5661 arr.as_ptr(),
5662 _mm256_setr_epi64x(0, 16, 64, 96),
5663 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5664 );
5665 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5666 }
5667
5668 #[simd_test(enable = "avx2")]
5669 unsafe fn test_mm_i64gather_epi64() {
5670 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5671 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5673 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5674 }
5675
5676 #[simd_test(enable = "avx2")]
5677 unsafe fn test_mm_mask_i64gather_epi64() {
5678 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5679 let r = _mm_mask_i64gather_epi64::<8>(
5681 _mm_set1_epi64x(256),
5682 arr.as_ptr(),
5683 _mm_setr_epi64x(16, 16),
5684 _mm_setr_epi64x(-1, 0),
5685 );
5686 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5687 }
5688
5689 #[simd_test(enable = "avx2")]
5690 unsafe fn test_mm256_i64gather_epi64() {
5691 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5692 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5694 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5695 }
5696
5697 #[simd_test(enable = "avx2")]
5698 unsafe fn test_mm256_mask_i64gather_epi64() {
5699 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5700 let r = _mm256_mask_i64gather_epi64::<8>(
5702 _mm256_set1_epi64x(256),
5703 arr.as_ptr(),
5704 _mm256_setr_epi64x(0, 16, 64, 96),
5705 _mm256_setr_epi64x(-1, -1, -1, 0),
5706 );
5707 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5708 }
5709
5710 #[simd_test(enable = "avx2")]
5711 unsafe fn test_mm_i64gather_pd() {
5712 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5713 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5715 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5716 }
5717
5718 #[simd_test(enable = "avx2")]
5719 unsafe fn test_mm_mask_i64gather_pd() {
5720 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5721 let r = _mm_mask_i64gather_pd::<8>(
5723 _mm_set1_pd(256.0),
5724 arr.as_ptr(),
5725 _mm_setr_epi64x(16, 16),
5726 _mm_setr_pd(-1.0, 0.0),
5727 );
5728 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5729 }
5730
5731 #[simd_test(enable = "avx2")]
5732 unsafe fn test_mm256_i64gather_pd() {
5733 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5734 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5736 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5737 }
5738
5739 #[simd_test(enable = "avx2")]
5740 unsafe fn test_mm256_mask_i64gather_pd() {
5741 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5742 let r = _mm256_mask_i64gather_pd::<8>(
5744 _mm256_set1_pd(256.0),
5745 arr.as_ptr(),
5746 _mm256_setr_epi64x(0, 16, 64, 96),
5747 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5748 );
5749 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5750 }
5751
5752 #[simd_test(enable = "avx2")]
5753 unsafe fn test_mm256_extract_epi8() {
5754 #[rustfmt::skip]
5755 let a = _mm256_setr_epi8(
5756 -1, 1, 2, 3, 4, 5, 6, 7,
5757 8, 9, 10, 11, 12, 13, 14, 15,
5758 16, 17, 18, 19, 20, 21, 22, 23,
5759 24, 25, 26, 27, 28, 29, 30, 31
5760 );
5761 let r1 = _mm256_extract_epi8::<0>(a);
5762 let r2 = _mm256_extract_epi8::<3>(a);
5763 assert_eq!(r1, 0xFF);
5764 assert_eq!(r2, 3);
5765 }
5766
5767 #[simd_test(enable = "avx2")]
5768 unsafe fn test_mm256_extract_epi16() {
5769 #[rustfmt::skip]
5770 let a = _mm256_setr_epi16(
5771 -1, 1, 2, 3, 4, 5, 6, 7,
5772 8, 9, 10, 11, 12, 13, 14, 15,
5773 );
5774 let r1 = _mm256_extract_epi16::<0>(a);
5775 let r2 = _mm256_extract_epi16::<3>(a);
5776 assert_eq!(r1, 0xFFFF);
5777 assert_eq!(r2, 3);
5778 }
5779}