1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23 pause()
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172 transmute(simd_cast::<_, u8x16>(r))
173 }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188 transmute(simd_cast::<_, u16x8>(r))
189 }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
212#[target_feature(enable = "sse2")]
213#[cfg_attr(test, assert_instr(pmaxsw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216 unsafe {
217 let a = a.as_i16x8();
218 let b = b.as_i16x8();
219 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
220 }
221}
222
223#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxub))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232 unsafe {
233 let a = a.as_u8x16();
234 let b = b.as_u8x16();
235 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
236 }
237}
238
239#[inline]
244#[target_feature(enable = "sse2")]
245#[cfg_attr(test, assert_instr(pminsw))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248 unsafe {
249 let a = a.as_i16x8();
250 let b = b.as_i16x8();
251 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
252 }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminub))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264 unsafe {
265 let a = a.as_u8x16();
266 let b = b.as_u8x16();
267 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
268 }
269}
270
271#[inline]
278#[target_feature(enable = "sse2")]
279#[cfg_attr(test, assert_instr(pmulhw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282 unsafe {
283 let a = simd_cast::<_, i32x8>(a.as_i16x8());
284 let b = simd_cast::<_, i32x8>(b.as_i16x8());
285 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
286 transmute(simd_cast::<i32x8, i16x8>(r))
287 }
288}
289
290#[inline]
297#[target_feature(enable = "sse2")]
298#[cfg_attr(test, assert_instr(pmulhuw))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301 unsafe {
302 let a = simd_cast::<_, u32x8>(a.as_u16x8());
303 let b = simd_cast::<_, u32x8>(b.as_u16x8());
304 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
305 transmute(simd_cast::<u32x8, u16x8>(r))
306 }
307}
308
309#[inline]
316#[target_feature(enable = "sse2")]
317#[cfg_attr(test, assert_instr(pmullw))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
321}
322
323#[inline]
330#[target_feature(enable = "sse2")]
331#[cfg_attr(test, assert_instr(pmuludq))]
332#[stable(feature = "simd_x86", since = "1.27.0")]
333pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334 unsafe {
335 let a = a.as_u64x2();
336 let b = b.as_u64x2();
337 let mask = u64x2::splat(u32::MAX.into());
338 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
339 }
340}
341
342#[inline]
351#[target_feature(enable = "sse2")]
352#[cfg_attr(test, assert_instr(psadbw))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
356}
357
358#[inline]
362#[target_feature(enable = "sse2")]
363#[cfg_attr(test, assert_instr(psubb))]
364#[stable(feature = "simd_x86", since = "1.27.0")]
365pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
367}
368
369#[inline]
373#[target_feature(enable = "sse2")]
374#[cfg_attr(test, assert_instr(psubw))]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
378}
379
380#[inline]
384#[target_feature(enable = "sse2")]
385#[cfg_attr(test, assert_instr(psubd))]
386#[stable(feature = "simd_x86", since = "1.27.0")]
387pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
389}
390
391#[inline]
395#[target_feature(enable = "sse2")]
396#[cfg_attr(test, assert_instr(psubq))]
397#[stable(feature = "simd_x86", since = "1.27.0")]
398pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
400}
401
402#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubsb))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
412}
413
414#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsw))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubusb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
436}
437
438#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusw))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
448}
449
450#[inline]
454#[target_feature(enable = "sse2")]
455#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
456#[rustc_legacy_const_generics(1)]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459 static_assert_uimm_bits!(IMM8, 8);
460 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461}
462
463#[inline]
466#[target_feature(enable = "sse2")]
467unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468 const fn mask(shift: i32, i: u32) -> u32 {
469 let shift = shift as u32 & 0xff;
470 if shift > 15 { i } else { 16 - shift + i }
471 }
472 transmute::<i8x16, _>(simd_shuffle!(
473 i8x16::ZERO,
474 a.as_i8x16(),
475 [
476 mask(IMM8, 0),
477 mask(IMM8, 1),
478 mask(IMM8, 2),
479 mask(IMM8, 3),
480 mask(IMM8, 4),
481 mask(IMM8, 5),
482 mask(IMM8, 6),
483 mask(IMM8, 7),
484 mask(IMM8, 8),
485 mask(IMM8, 9),
486 mask(IMM8, 10),
487 mask(IMM8, 11),
488 mask(IMM8, 12),
489 mask(IMM8, 13),
490 mask(IMM8, 14),
491 mask(IMM8, 15),
492 ],
493 ))
494}
495
496#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505 unsafe {
506 static_assert_uimm_bits!(IMM8, 8);
507 _mm_slli_si128_impl::<IMM8>(a)
508 }
509}
510
511#[inline]
515#[target_feature(enable = "sse2")]
516#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
517#[rustc_legacy_const_generics(1)]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520 unsafe {
521 static_assert_uimm_bits!(IMM8, 8);
522 _mm_srli_si128_impl::<IMM8>(a)
523 }
524}
525
526#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535 static_assert_uimm_bits!(IMM8, 8);
536 unsafe {
537 if IMM8 >= 16 {
538 _mm_setzero_si128()
539 } else {
540 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
541 }
542 }
543}
544
545#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw))]
552#[stable(feature = "simd_x86", since = "1.27.0")]
553pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
555}
556
557#[inline]
561#[target_feature(enable = "sse2")]
562#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
563#[rustc_legacy_const_generics(1)]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566 static_assert_uimm_bits!(IMM8, 8);
567 unsafe {
568 if IMM8 >= 32 {
569 _mm_setzero_si128()
570 } else {
571 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
572 }
573 }
574}
575
576#[inline]
581#[target_feature(enable = "sse2")]
582#[cfg_attr(test, assert_instr(pslld))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
586}
587
588#[inline]
592#[target_feature(enable = "sse2")]
593#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
594#[rustc_legacy_const_generics(1)]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597 static_assert_uimm_bits!(IMM8, 8);
598 unsafe {
599 if IMM8 >= 64 {
600 _mm_setzero_si128()
601 } else {
602 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
603 }
604 }
605}
606
607#[inline]
612#[target_feature(enable = "sse2")]
613#[cfg_attr(test, assert_instr(psllq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
617}
618
619#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
626#[rustc_legacy_const_generics(1)]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629 static_assert_uimm_bits!(IMM8, 8);
630 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
631}
632
633#[inline]
638#[target_feature(enable = "sse2")]
639#[cfg_attr(test, assert_instr(psraw))]
640#[stable(feature = "simd_x86", since = "1.27.0")]
641pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
643}
644
645#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
652#[rustc_legacy_const_generics(1)]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655 static_assert_uimm_bits!(IMM8, 8);
656 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
657}
658
659#[inline]
664#[target_feature(enable = "sse2")]
665#[cfg_attr(test, assert_instr(psrad))]
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
669}
670
671#[inline]
675#[target_feature(enable = "sse2")]
676#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
677#[rustc_legacy_const_generics(1)]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680 static_assert_uimm_bits!(IMM8, 8);
681 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682}
683
684#[inline]
687#[target_feature(enable = "sse2")]
688unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689 const fn mask(shift: i32, i: u32) -> u32 {
690 if (shift as u32) > 15 {
691 i + 16
692 } else {
693 i + (shift as u32)
694 }
695 }
696 let x: i8x16 = simd_shuffle!(
697 a.as_i8x16(),
698 i8x16::ZERO,
699 [
700 mask(IMM8, 0),
701 mask(IMM8, 1),
702 mask(IMM8, 2),
703 mask(IMM8, 3),
704 mask(IMM8, 4),
705 mask(IMM8, 5),
706 mask(IMM8, 6),
707 mask(IMM8, 7),
708 mask(IMM8, 8),
709 mask(IMM8, 9),
710 mask(IMM8, 10),
711 mask(IMM8, 11),
712 mask(IMM8, 12),
713 mask(IMM8, 13),
714 mask(IMM8, 14),
715 mask(IMM8, 15),
716 ],
717 );
718 transmute(x)
719}
720
721#[inline]
726#[target_feature(enable = "sse2")]
727#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
728#[rustc_legacy_const_generics(1)]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731 static_assert_uimm_bits!(IMM8, 8);
732 unsafe {
733 if IMM8 >= 16 {
734 _mm_setzero_si128()
735 } else {
736 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
737 }
738 }
739}
740
741#[inline]
746#[target_feature(enable = "sse2")]
747#[cfg_attr(test, assert_instr(psrlw))]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
751}
752
753#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
760#[rustc_legacy_const_generics(1)]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763 static_assert_uimm_bits!(IMM8, 8);
764 unsafe {
765 if IMM8 >= 32 {
766 _mm_setzero_si128()
767 } else {
768 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
769 }
770 }
771}
772
773#[inline]
778#[target_feature(enable = "sse2")]
779#[cfg_attr(test, assert_instr(psrld))]
780#[stable(feature = "simd_x86", since = "1.27.0")]
781pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
783}
784
785#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
792#[rustc_legacy_const_generics(1)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795 static_assert_uimm_bits!(IMM8, 8);
796 unsafe {
797 if IMM8 >= 64 {
798 _mm_setzero_si128()
799 } else {
800 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
801 }
802 }
803}
804
805#[inline]
810#[target_feature(enable = "sse2")]
811#[cfg_attr(test, assert_instr(psrlq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
815}
816
817#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(andps))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826 unsafe { simd_and(a, b) }
827}
828
829#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andnps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(orps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850 unsafe { simd_or(a, b) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(xorps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862 unsafe { simd_xor(a, b) }
863}
864
865#[inline]
869#[target_feature(enable = "sse2")]
870#[cfg_attr(test, assert_instr(pcmpeqb))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
874}
875
876#[inline]
880#[target_feature(enable = "sse2")]
881#[cfg_attr(test, assert_instr(pcmpeqw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
885}
886
887#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(pcmpeqd))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
896}
897
898#[inline]
902#[target_feature(enable = "sse2")]
903#[cfg_attr(test, assert_instr(pcmpgtb))]
904#[stable(feature = "simd_x86", since = "1.27.0")]
905pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
907}
908
909#[inline]
913#[target_feature(enable = "sse2")]
914#[cfg_attr(test, assert_instr(pcmpgtw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
918}
919
920#[inline]
924#[target_feature(enable = "sse2")]
925#[cfg_attr(test, assert_instr(pcmpgtd))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
929}
930
931#[inline]
935#[target_feature(enable = "sse2")]
936#[cfg_attr(test, assert_instr(pcmpgtb))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
940}
941
942#[inline]
946#[target_feature(enable = "sse2")]
947#[cfg_attr(test, assert_instr(pcmpgtw))]
948#[stable(feature = "simd_x86", since = "1.27.0")]
949pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtd))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
962}
963
964#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(cvtdq2pd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973 unsafe {
974 let a = a.as_i32x4();
975 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
976 }
977}
978
979#[inline]
984#[target_feature(enable = "sse2")]
985#[cfg_attr(test, assert_instr(cvtsi2sd))]
986#[stable(feature = "simd_x86", since = "1.27.0")]
987pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988 unsafe { simd_insert!(a, 0, b as f64) }
989}
990
991#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtdq2ps))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtps2dq))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012 unsafe { transmute(cvtps2dq(a)) }
1013}
1014
1015#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1022pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1024}
1025
1026#[inline]
1030#[target_feature(enable = "sse2")]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033 unsafe { simd_extract!(a.as_i32x4(), 0) }
1034}
1035
1036#[inline]
1041#[target_feature(enable = "sse2")]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045 unsafe { transmute(i64x2::new(e0, e1)) }
1046}
1047
1048#[inline]
1052#[target_feature(enable = "sse2")]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1057}
1058
1059#[inline]
1063#[target_feature(enable = "sse2")]
1064#[stable(feature = "simd_x86", since = "1.27.0")]
1066pub fn _mm_set_epi16(
1067 e7: i16,
1068 e6: i16,
1069 e5: i16,
1070 e4: i16,
1071 e3: i16,
1072 e2: i16,
1073 e1: i16,
1074 e0: i16,
1075) -> __m128i {
1076 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub fn _mm_set_epi8(
1087 e15: i8,
1088 e14: i8,
1089 e13: i8,
1090 e12: i8,
1091 e11: i8,
1092 e10: i8,
1093 e9: i8,
1094 e8: i8,
1095 e7: i8,
1096 e6: i8,
1097 e5: i8,
1098 e4: i8,
1099 e3: i8,
1100 e2: i8,
1101 e1: i8,
1102 e0: i8,
1103) -> __m128i {
1104 unsafe {
1105 #[rustfmt::skip]
1106 transmute(i8x16::new(
1107 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1108 ))
1109 }
1110}
1111
1112#[inline]
1116#[target_feature(enable = "sse2")]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120 _mm_set_epi64x(a, a)
1121}
1122
1123#[inline]
1127#[target_feature(enable = "sse2")]
1128#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131 _mm_set_epi32(a, a, a, a)
1132}
1133
1134#[inline]
1138#[target_feature(enable = "sse2")]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142 _mm_set_epi16(a, a, a, a, a, a, a, a)
1143}
1144
1145#[inline]
1149#[target_feature(enable = "sse2")]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1154}
1155
1156#[inline]
1160#[target_feature(enable = "sse2")]
1161#[stable(feature = "simd_x86", since = "1.27.0")]
1163pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164 _mm_set_epi32(e0, e1, e2, e3)
1165}
1166
1167#[inline]
1171#[target_feature(enable = "sse2")]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm_setr_epi16(
1175 e7: i16,
1176 e6: i16,
1177 e5: i16,
1178 e4: i16,
1179 e3: i16,
1180 e2: i16,
1181 e1: i16,
1182 e0: i16,
1183) -> __m128i {
1184 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1185}
1186
1187#[inline]
1191#[target_feature(enable = "sse2")]
1192#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub fn _mm_setr_epi8(
1195 e15: i8,
1196 e14: i8,
1197 e13: i8,
1198 e12: i8,
1199 e11: i8,
1200 e10: i8,
1201 e9: i8,
1202 e8: i8,
1203 e7: i8,
1204 e6: i8,
1205 e5: i8,
1206 e4: i8,
1207 e3: i8,
1208 e2: i8,
1209 e1: i8,
1210 e0: i8,
1211) -> __m128i {
1212 #[rustfmt::skip]
1213 _mm_set_epi8(
1214 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1215 )
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(xorps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub fn _mm_setzero_si128() -> __m128i {
1226 const { unsafe { mem::zeroed() } }
1227}
1228
1229#[inline]
1233#[target_feature(enable = "sse2")]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1237}
1238
1239#[inline]
1245#[target_feature(enable = "sse2")]
1246#[cfg_attr(
1247 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1248 assert_instr(movaps)
1249)]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1252 *mem_addr
1253}
1254
1255#[inline]
1261#[target_feature(enable = "sse2")]
1262#[cfg_attr(test, assert_instr(movups))]
1263#[stable(feature = "simd_x86", since = "1.27.0")]
1264pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1265 let mut dst: __m128i = _mm_undefined_si128();
1266 ptr::copy_nonoverlapping(
1267 mem_addr as *const u8,
1268 ptr::addr_of_mut!(dst) as *mut u8,
1269 mem::size_of::<__m128i>(),
1270 );
1271 dst
1272}
1273
1274#[inline]
1294#[target_feature(enable = "sse2")]
1295#[cfg_attr(test, assert_instr(maskmovdqu))]
1296#[stable(feature = "simd_x86", since = "1.27.0")]
1297pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1298 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1299}
1300
1301#[inline]
1307#[target_feature(enable = "sse2")]
1308#[cfg_attr(
1309 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1310 assert_instr(movaps)
1311)]
1312#[stable(feature = "simd_x86", since = "1.27.0")]
1313pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1314 *mem_addr = a;
1315}
1316
1317#[inline]
1323#[target_feature(enable = "sse2")]
1324#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1326pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1327 mem_addr.write_unaligned(a);
1328}
1329
1330#[inline]
1336#[target_feature(enable = "sse2")]
1337#[stable(feature = "simd_x86", since = "1.27.0")]
1338pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1339 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1340}
1341
1342#[inline]
1357#[target_feature(enable = "sse2")]
1358#[cfg_attr(test, assert_instr(movntdq))]
1359#[stable(feature = "simd_x86", since = "1.27.0")]
1360pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1361 crate::arch::asm!(
1362 vps!("movntdq", ",{a}"),
1363 p = in(reg) mem_addr,
1364 a = in(xmm_reg) a,
1365 options(nostack, preserves_flags),
1366 );
1367}
1368
1369#[inline]
1384#[target_feature(enable = "sse2")]
1385#[cfg_attr(test, assert_instr(movnti))]
1386#[stable(feature = "simd_x86", since = "1.27.0")]
1387pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1388 crate::arch::asm!(
1389 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1391 a = in(reg) a,
1392 options(nostack, preserves_flags),
1393 );
1394}
1395
1396#[inline]
1401#[target_feature(enable = "sse2")]
1402#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1404#[stable(feature = "simd_x86", since = "1.27.0")]
1405pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1406 unsafe {
1407 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1408 transmute(r)
1409 }
1410}
1411
1412#[inline]
1417#[target_feature(enable = "sse2")]
1418#[cfg_attr(test, assert_instr(packsswb))]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1421 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1422}
1423
1424#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(packssdw))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1433 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1434}
1435
1436#[inline]
1441#[target_feature(enable = "sse2")]
1442#[cfg_attr(test, assert_instr(packuswb))]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1445 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1446}
1447
1448#[inline]
1452#[target_feature(enable = "sse2")]
1453#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1454#[rustc_legacy_const_generics(1)]
1455#[stable(feature = "simd_x86", since = "1.27.0")]
1456pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1457 static_assert_uimm_bits!(IMM8, 3);
1458 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1459}
1460
1461#[inline]
1465#[target_feature(enable = "sse2")]
1466#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1467#[rustc_legacy_const_generics(2)]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1470 static_assert_uimm_bits!(IMM8, 3);
1471 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1472}
1473
1474#[inline]
1478#[target_feature(enable = "sse2")]
1479#[cfg_attr(test, assert_instr(pmovmskb))]
1480#[stable(feature = "simd_x86", since = "1.27.0")]
1481pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1482 unsafe {
1483 let z = i8x16::ZERO;
1484 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1485 simd_bitmask::<_, u16>(m) as u32 as i32
1486 }
1487}
1488
1489#[inline]
1493#[target_feature(enable = "sse2")]
1494#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1495#[rustc_legacy_const_generics(1)]
1496#[stable(feature = "simd_x86", since = "1.27.0")]
1497pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1498 static_assert_uimm_bits!(IMM8, 8);
1499 unsafe {
1500 let a = a.as_i32x4();
1501 let x: i32x4 = simd_shuffle!(
1502 a,
1503 a,
1504 [
1505 IMM8 as u32 & 0b11,
1506 (IMM8 as u32 >> 2) & 0b11,
1507 (IMM8 as u32 >> 4) & 0b11,
1508 (IMM8 as u32 >> 6) & 0b11,
1509 ],
1510 );
1511 transmute(x)
1512 }
1513}
1514
1515#[inline]
1523#[target_feature(enable = "sse2")]
1524#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1525#[rustc_legacy_const_generics(1)]
1526#[stable(feature = "simd_x86", since = "1.27.0")]
1527pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1528 static_assert_uimm_bits!(IMM8, 8);
1529 unsafe {
1530 let a = a.as_i16x8();
1531 let x: i16x8 = simd_shuffle!(
1532 a,
1533 a,
1534 [
1535 0,
1536 1,
1537 2,
1538 3,
1539 (IMM8 as u32 & 0b11) + 4,
1540 ((IMM8 as u32 >> 2) & 0b11) + 4,
1541 ((IMM8 as u32 >> 4) & 0b11) + 4,
1542 ((IMM8 as u32 >> 6) & 0b11) + 4,
1543 ],
1544 );
1545 transmute(x)
1546 }
1547}
1548
1549#[inline]
1557#[target_feature(enable = "sse2")]
1558#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1559#[rustc_legacy_const_generics(1)]
1560#[stable(feature = "simd_x86", since = "1.27.0")]
1561pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1562 static_assert_uimm_bits!(IMM8, 8);
1563 unsafe {
1564 let a = a.as_i16x8();
1565 let x: i16x8 = simd_shuffle!(
1566 a,
1567 a,
1568 [
1569 IMM8 as u32 & 0b11,
1570 (IMM8 as u32 >> 2) & 0b11,
1571 (IMM8 as u32 >> 4) & 0b11,
1572 (IMM8 as u32 >> 6) & 0b11,
1573 4,
1574 5,
1575 6,
1576 7,
1577 ],
1578 );
1579 transmute(x)
1580 }
1581}
1582
1583#[inline]
1587#[target_feature(enable = "sse2")]
1588#[cfg_attr(test, assert_instr(punpckhbw))]
1589#[stable(feature = "simd_x86", since = "1.27.0")]
1590pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1591 unsafe {
1592 transmute::<i8x16, _>(simd_shuffle!(
1593 a.as_i8x16(),
1594 b.as_i8x16(),
1595 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1596 ))
1597 }
1598}
1599
1600#[inline]
1604#[target_feature(enable = "sse2")]
1605#[cfg_attr(test, assert_instr(punpckhwd))]
1606#[stable(feature = "simd_x86", since = "1.27.0")]
1607pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1608 unsafe {
1609 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1610 transmute::<i16x8, _>(x)
1611 }
1612}
1613
1614#[inline]
1618#[target_feature(enable = "sse2")]
1619#[cfg_attr(test, assert_instr(unpckhps))]
1620#[stable(feature = "simd_x86", since = "1.27.0")]
1621pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1622 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1623}
1624
1625#[inline]
1629#[target_feature(enable = "sse2")]
1630#[cfg_attr(test, assert_instr(unpckhpd))]
1631#[stable(feature = "simd_x86", since = "1.27.0")]
1632pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1633 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1634}
1635
1636#[inline]
1640#[target_feature(enable = "sse2")]
1641#[cfg_attr(test, assert_instr(punpcklbw))]
1642#[stable(feature = "simd_x86", since = "1.27.0")]
1643pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1644 unsafe {
1645 transmute::<i8x16, _>(simd_shuffle!(
1646 a.as_i8x16(),
1647 b.as_i8x16(),
1648 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1649 ))
1650 }
1651}
1652
1653#[inline]
1657#[target_feature(enable = "sse2")]
1658#[cfg_attr(test, assert_instr(punpcklwd))]
1659#[stable(feature = "simd_x86", since = "1.27.0")]
1660pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1661 unsafe {
1662 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1663 transmute::<i16x8, _>(x)
1664 }
1665}
1666
1667#[inline]
1671#[target_feature(enable = "sse2")]
1672#[cfg_attr(test, assert_instr(unpcklps))]
1673#[stable(feature = "simd_x86", since = "1.27.0")]
1674pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1675 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1676}
1677
1678#[inline]
1682#[target_feature(enable = "sse2")]
1683#[cfg_attr(test, assert_instr(movlhps))]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1686 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1687}
1688
1689#[inline]
1694#[target_feature(enable = "sse2")]
1695#[cfg_attr(test, assert_instr(addsd))]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1698 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1699}
1700
1701#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(addpd))]
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1710 unsafe { simd_add(a, b) }
1711}
1712
1713#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(divsd))]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1722 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1723}
1724
1725#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(divpd))]
1732#[stable(feature = "simd_x86", since = "1.27.0")]
1733pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1734 unsafe { simd_div(a, b) }
1735}
1736
1737#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(maxsd))]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1746 unsafe { maxsd(a, b) }
1747}
1748
1749#[inline]
1754#[target_feature(enable = "sse2")]
1755#[cfg_attr(test, assert_instr(maxpd))]
1756#[stable(feature = "simd_x86", since = "1.27.0")]
1757pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1758 unsafe { maxpd(a, b) }
1759}
1760
1761#[inline]
1766#[target_feature(enable = "sse2")]
1767#[cfg_attr(test, assert_instr(minsd))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1770 unsafe { minsd(a, b) }
1771}
1772
1773#[inline]
1778#[target_feature(enable = "sse2")]
1779#[cfg_attr(test, assert_instr(minpd))]
1780#[stable(feature = "simd_x86", since = "1.27.0")]
1781pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1782 unsafe { minpd(a, b) }
1783}
1784
1785#[inline]
1790#[target_feature(enable = "sse2")]
1791#[cfg_attr(test, assert_instr(mulsd))]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1794 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1795}
1796
1797#[inline]
1802#[target_feature(enable = "sse2")]
1803#[cfg_attr(test, assert_instr(mulpd))]
1804#[stable(feature = "simd_x86", since = "1.27.0")]
1805pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1806 unsafe { simd_mul(a, b) }
1807}
1808
1809#[inline]
1814#[target_feature(enable = "sse2")]
1815#[cfg_attr(test, assert_instr(sqrtsd))]
1816#[stable(feature = "simd_x86", since = "1.27.0")]
1817pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1818 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1819}
1820
1821#[inline]
1825#[target_feature(enable = "sse2")]
1826#[cfg_attr(test, assert_instr(sqrtpd))]
1827#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1829 unsafe { simd_fsqrt(a) }
1830}
1831
1832#[inline]
1837#[target_feature(enable = "sse2")]
1838#[cfg_attr(test, assert_instr(subsd))]
1839#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1841 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1842}
1843
1844#[inline]
1849#[target_feature(enable = "sse2")]
1850#[cfg_attr(test, assert_instr(subpd))]
1851#[stable(feature = "simd_x86", since = "1.27.0")]
1852pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1853 unsafe { simd_sub(a, b) }
1854}
1855
1856#[inline]
1861#[target_feature(enable = "sse2")]
1862#[cfg_attr(test, assert_instr(andps))]
1863#[stable(feature = "simd_x86", since = "1.27.0")]
1864pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1865 unsafe {
1866 let a: __m128i = transmute(a);
1867 let b: __m128i = transmute(b);
1868 transmute(_mm_and_si128(a, b))
1869 }
1870}
1871
1872#[inline]
1876#[target_feature(enable = "sse2")]
1877#[cfg_attr(test, assert_instr(andnps))]
1878#[stable(feature = "simd_x86", since = "1.27.0")]
1879pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1880 unsafe {
1881 let a: __m128i = transmute(a);
1882 let b: __m128i = transmute(b);
1883 transmute(_mm_andnot_si128(a, b))
1884 }
1885}
1886
1887#[inline]
1891#[target_feature(enable = "sse2")]
1892#[cfg_attr(test, assert_instr(orps))]
1893#[stable(feature = "simd_x86", since = "1.27.0")]
1894pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1895 unsafe {
1896 let a: __m128i = transmute(a);
1897 let b: __m128i = transmute(b);
1898 transmute(_mm_or_si128(a, b))
1899 }
1900}
1901
1902#[inline]
1906#[target_feature(enable = "sse2")]
1907#[cfg_attr(test, assert_instr(xorps))]
1908#[stable(feature = "simd_x86", since = "1.27.0")]
1909pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1910 unsafe {
1911 let a: __m128i = transmute(a);
1912 let b: __m128i = transmute(b);
1913 transmute(_mm_xor_si128(a, b))
1914 }
1915}
1916
1917#[inline]
1922#[target_feature(enable = "sse2")]
1923#[cfg_attr(test, assert_instr(cmpeqsd))]
1924#[stable(feature = "simd_x86", since = "1.27.0")]
1925pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1926 unsafe { cmpsd(a, b, 0) }
1927}
1928
1929#[inline]
1934#[target_feature(enable = "sse2")]
1935#[cfg_attr(test, assert_instr(cmpltsd))]
1936#[stable(feature = "simd_x86", since = "1.27.0")]
1937pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1938 unsafe { cmpsd(a, b, 1) }
1939}
1940
1941#[inline]
1946#[target_feature(enable = "sse2")]
1947#[cfg_attr(test, assert_instr(cmplesd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1950 unsafe { cmpsd(a, b, 2) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(cmpltsd))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1962 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1963}
1964
1965#[inline]
1970#[target_feature(enable = "sse2")]
1971#[cfg_attr(test, assert_instr(cmplesd))]
1972#[stable(feature = "simd_x86", since = "1.27.0")]
1973pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1974 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1975}
1976
1977#[inline]
1984#[target_feature(enable = "sse2")]
1985#[cfg_attr(test, assert_instr(cmpordsd))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1988 unsafe { cmpsd(a, b, 7) }
1989}
1990
1991#[inline]
1997#[target_feature(enable = "sse2")]
1998#[cfg_attr(test, assert_instr(cmpunordsd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2001 unsafe { cmpsd(a, b, 3) }
2002}
2003
2004#[inline]
2009#[target_feature(enable = "sse2")]
2010#[cfg_attr(test, assert_instr(cmpneqsd))]
2011#[stable(feature = "simd_x86", since = "1.27.0")]
2012pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2013 unsafe { cmpsd(a, b, 4) }
2014}
2015
2016#[inline]
2021#[target_feature(enable = "sse2")]
2022#[cfg_attr(test, assert_instr(cmpnltsd))]
2023#[stable(feature = "simd_x86", since = "1.27.0")]
2024pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2025 unsafe { cmpsd(a, b, 5) }
2026}
2027
2028#[inline]
2033#[target_feature(enable = "sse2")]
2034#[cfg_attr(test, assert_instr(cmpnlesd))]
2035#[stable(feature = "simd_x86", since = "1.27.0")]
2036pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2037 unsafe { cmpsd(a, b, 6) }
2038}
2039
2040#[inline]
2045#[target_feature(enable = "sse2")]
2046#[cfg_attr(test, assert_instr(cmpnltsd))]
2047#[stable(feature = "simd_x86", since = "1.27.0")]
2048pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2049 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2050}
2051
2052#[inline]
2057#[target_feature(enable = "sse2")]
2058#[cfg_attr(test, assert_instr(cmpnlesd))]
2059#[stable(feature = "simd_x86", since = "1.27.0")]
2060pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2061 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2062}
2063
2064#[inline]
2068#[target_feature(enable = "sse2")]
2069#[cfg_attr(test, assert_instr(cmpeqpd))]
2070#[stable(feature = "simd_x86", since = "1.27.0")]
2071pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2072 unsafe { cmppd(a, b, 0) }
2073}
2074
2075#[inline]
2079#[target_feature(enable = "sse2")]
2080#[cfg_attr(test, assert_instr(cmpltpd))]
2081#[stable(feature = "simd_x86", since = "1.27.0")]
2082pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2083 unsafe { cmppd(a, b, 1) }
2084}
2085
2086#[inline]
2090#[target_feature(enable = "sse2")]
2091#[cfg_attr(test, assert_instr(cmplepd))]
2092#[stable(feature = "simd_x86", since = "1.27.0")]
2093pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2094 unsafe { cmppd(a, b, 2) }
2095}
2096
2097#[inline]
2101#[target_feature(enable = "sse2")]
2102#[cfg_attr(test, assert_instr(cmpltpd))]
2103#[stable(feature = "simd_x86", since = "1.27.0")]
2104pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2105 _mm_cmplt_pd(b, a)
2106}
2107
2108#[inline]
2112#[target_feature(enable = "sse2")]
2113#[cfg_attr(test, assert_instr(cmplepd))]
2114#[stable(feature = "simd_x86", since = "1.27.0")]
2115pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2116 _mm_cmple_pd(b, a)
2117}
2118
2119#[inline]
2123#[target_feature(enable = "sse2")]
2124#[cfg_attr(test, assert_instr(cmpordpd))]
2125#[stable(feature = "simd_x86", since = "1.27.0")]
2126pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2127 unsafe { cmppd(a, b, 7) }
2128}
2129
2130#[inline]
2134#[target_feature(enable = "sse2")]
2135#[cfg_attr(test, assert_instr(cmpunordpd))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2138 unsafe { cmppd(a, b, 3) }
2139}
2140
2141#[inline]
2145#[target_feature(enable = "sse2")]
2146#[cfg_attr(test, assert_instr(cmpneqpd))]
2147#[stable(feature = "simd_x86", since = "1.27.0")]
2148pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2149 unsafe { cmppd(a, b, 4) }
2150}
2151
2152#[inline]
2156#[target_feature(enable = "sse2")]
2157#[cfg_attr(test, assert_instr(cmpnltpd))]
2158#[stable(feature = "simd_x86", since = "1.27.0")]
2159pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2160 unsafe { cmppd(a, b, 5) }
2161}
2162
2163#[inline]
2167#[target_feature(enable = "sse2")]
2168#[cfg_attr(test, assert_instr(cmpnlepd))]
2169#[stable(feature = "simd_x86", since = "1.27.0")]
2170pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2171 unsafe { cmppd(a, b, 6) }
2172}
2173
2174#[inline]
2178#[target_feature(enable = "sse2")]
2179#[cfg_attr(test, assert_instr(cmpnltpd))]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2182 _mm_cmpnlt_pd(b, a)
2183}
2184
2185#[inline]
2190#[target_feature(enable = "sse2")]
2191#[cfg_attr(test, assert_instr(cmpnlepd))]
2192#[stable(feature = "simd_x86", since = "1.27.0")]
2193pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2194 _mm_cmpnle_pd(b, a)
2195}
2196
2197#[inline]
2201#[target_feature(enable = "sse2")]
2202#[cfg_attr(test, assert_instr(comisd))]
2203#[stable(feature = "simd_x86", since = "1.27.0")]
2204pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2205 unsafe { comieqsd(a, b) }
2206}
2207
2208#[inline]
2212#[target_feature(enable = "sse2")]
2213#[cfg_attr(test, assert_instr(comisd))]
2214#[stable(feature = "simd_x86", since = "1.27.0")]
2215pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2216 unsafe { comiltsd(a, b) }
2217}
2218
2219#[inline]
2223#[target_feature(enable = "sse2")]
2224#[cfg_attr(test, assert_instr(comisd))]
2225#[stable(feature = "simd_x86", since = "1.27.0")]
2226pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2227 unsafe { comilesd(a, b) }
2228}
2229
2230#[inline]
2234#[target_feature(enable = "sse2")]
2235#[cfg_attr(test, assert_instr(comisd))]
2236#[stable(feature = "simd_x86", since = "1.27.0")]
2237pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2238 unsafe { comigtsd(a, b) }
2239}
2240
2241#[inline]
2245#[target_feature(enable = "sse2")]
2246#[cfg_attr(test, assert_instr(comisd))]
2247#[stable(feature = "simd_x86", since = "1.27.0")]
2248pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2249 unsafe { comigesd(a, b) }
2250}
2251
2252#[inline]
2256#[target_feature(enable = "sse2")]
2257#[cfg_attr(test, assert_instr(comisd))]
2258#[stable(feature = "simd_x86", since = "1.27.0")]
2259pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2260 unsafe { comineqsd(a, b) }
2261}
2262
2263#[inline]
2267#[target_feature(enable = "sse2")]
2268#[cfg_attr(test, assert_instr(ucomisd))]
2269#[stable(feature = "simd_x86", since = "1.27.0")]
2270pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2271 unsafe { ucomieqsd(a, b) }
2272}
2273
2274#[inline]
2278#[target_feature(enable = "sse2")]
2279#[cfg_attr(test, assert_instr(ucomisd))]
2280#[stable(feature = "simd_x86", since = "1.27.0")]
2281pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2282 unsafe { ucomiltsd(a, b) }
2283}
2284
2285#[inline]
2289#[target_feature(enable = "sse2")]
2290#[cfg_attr(test, assert_instr(ucomisd))]
2291#[stable(feature = "simd_x86", since = "1.27.0")]
2292pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2293 unsafe { ucomilesd(a, b) }
2294}
2295
2296#[inline]
2300#[target_feature(enable = "sse2")]
2301#[cfg_attr(test, assert_instr(ucomisd))]
2302#[stable(feature = "simd_x86", since = "1.27.0")]
2303pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2304 unsafe { ucomigtsd(a, b) }
2305}
2306
2307#[inline]
2311#[target_feature(enable = "sse2")]
2312#[cfg_attr(test, assert_instr(ucomisd))]
2313#[stable(feature = "simd_x86", since = "1.27.0")]
2314pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2315 unsafe { ucomigesd(a, b) }
2316}
2317
2318#[inline]
2322#[target_feature(enable = "sse2")]
2323#[cfg_attr(test, assert_instr(ucomisd))]
2324#[stable(feature = "simd_x86", since = "1.27.0")]
2325pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2326 unsafe { ucomineqsd(a, b) }
2327}
2328
2329#[inline]
2334#[target_feature(enable = "sse2")]
2335#[cfg_attr(test, assert_instr(cvtpd2ps))]
2336#[stable(feature = "simd_x86", since = "1.27.0")]
2337pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2338 unsafe {
2339 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2340 let zero = f32x2::ZERO;
2341 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2342 }
2343}
2344
2345#[inline]
2351#[target_feature(enable = "sse2")]
2352#[cfg_attr(test, assert_instr(cvtps2pd))]
2353#[stable(feature = "simd_x86", since = "1.27.0")]
2354pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2355 unsafe {
2356 let a = a.as_f32x4();
2357 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2358 }
2359}
2360
2361#[inline]
2366#[target_feature(enable = "sse2")]
2367#[cfg_attr(test, assert_instr(cvtpd2dq))]
2368#[stable(feature = "simd_x86", since = "1.27.0")]
2369pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2370 unsafe { transmute(cvtpd2dq(a)) }
2371}
2372
2373#[inline]
2378#[target_feature(enable = "sse2")]
2379#[cfg_attr(test, assert_instr(cvtsd2si))]
2380#[stable(feature = "simd_x86", since = "1.27.0")]
2381pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2382 unsafe { cvtsd2si(a) }
2383}
2384
2385#[inline]
2392#[target_feature(enable = "sse2")]
2393#[cfg_attr(test, assert_instr(cvtsd2ss))]
2394#[stable(feature = "simd_x86", since = "1.27.0")]
2395pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2396 unsafe { cvtsd2ss(a, b) }
2397}
2398
2399#[inline]
2403#[target_feature(enable = "sse2")]
2404#[stable(feature = "simd_x86", since = "1.27.0")]
2405pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2406 unsafe { simd_extract!(a, 0) }
2407}
2408
2409#[inline]
2416#[target_feature(enable = "sse2")]
2417#[cfg_attr(test, assert_instr(cvtss2sd))]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2420 unsafe { cvtss2sd(a, b) }
2421}
2422
2423#[inline]
2428#[target_feature(enable = "sse2")]
2429#[cfg_attr(test, assert_instr(cvttpd2dq))]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2431pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2432 unsafe { transmute(cvttpd2dq(a)) }
2433}
2434
2435#[inline]
2440#[target_feature(enable = "sse2")]
2441#[cfg_attr(test, assert_instr(cvttsd2si))]
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2444 unsafe { cvttsd2si(a) }
2445}
2446
2447#[inline]
2452#[target_feature(enable = "sse2")]
2453#[cfg_attr(test, assert_instr(cvttps2dq))]
2454#[stable(feature = "simd_x86", since = "1.27.0")]
2455pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2456 unsafe { transmute(cvttps2dq(a)) }
2457}
2458
2459#[inline]
2464#[target_feature(enable = "sse2")]
2465#[stable(feature = "simd_x86", since = "1.27.0")]
2466pub fn _mm_set_sd(a: f64) -> __m128d {
2467 _mm_set_pd(0.0, a)
2468}
2469
2470#[inline]
2475#[target_feature(enable = "sse2")]
2476#[stable(feature = "simd_x86", since = "1.27.0")]
2477pub fn _mm_set1_pd(a: f64) -> __m128d {
2478 _mm_set_pd(a, a)
2479}
2480
2481#[inline]
2486#[target_feature(enable = "sse2")]
2487#[stable(feature = "simd_x86", since = "1.27.0")]
2488pub fn _mm_set_pd1(a: f64) -> __m128d {
2489 _mm_set_pd(a, a)
2490}
2491
2492#[inline]
2497#[target_feature(enable = "sse2")]
2498#[stable(feature = "simd_x86", since = "1.27.0")]
2499pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2500 __m128d([b, a])
2501}
2502
2503#[inline]
2508#[target_feature(enable = "sse2")]
2509#[stable(feature = "simd_x86", since = "1.27.0")]
2510pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2511 _mm_set_pd(b, a)
2512}
2513
2514#[inline]
2519#[target_feature(enable = "sse2")]
2520#[cfg_attr(test, assert_instr(xorp))]
2521#[stable(feature = "simd_x86", since = "1.27.0")]
2522pub fn _mm_setzero_pd() -> __m128d {
2523 const { unsafe { mem::zeroed() } }
2524}
2525
2526#[inline]
2533#[target_feature(enable = "sse2")]
2534#[cfg_attr(test, assert_instr(movmskpd))]
2535#[stable(feature = "simd_x86", since = "1.27.0")]
2536pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2537 unsafe {
2540 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2541 simd_bitmask::<i64x2, u8>(mask).into()
2542 }
2543}
2544
2545#[inline]
2552#[target_feature(enable = "sse2")]
2553#[cfg_attr(
2554 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2555 assert_instr(movaps)
2556)]
2557#[stable(feature = "simd_x86", since = "1.27.0")]
2558#[allow(clippy::cast_ptr_alignment)]
2559pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2560 *(mem_addr as *const __m128d)
2561}
2562
2563#[inline]
2568#[target_feature(enable = "sse2")]
2569#[cfg_attr(test, assert_instr(movsd))]
2570#[stable(feature = "simd_x86", since = "1.27.0")]
2571pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2572 _mm_setr_pd(*mem_addr, 0.)
2573}
2574
2575#[inline]
2581#[target_feature(enable = "sse2")]
2582#[cfg_attr(test, assert_instr(movhps))]
2583#[stable(feature = "simd_x86", since = "1.27.0")]
2584pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2585 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2586}
2587
2588#[inline]
2594#[target_feature(enable = "sse2")]
2595#[cfg_attr(test, assert_instr(movlps))]
2596#[stable(feature = "simd_x86", since = "1.27.0")]
2597pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2598 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2599}
2600
2601#[inline]
2617#[target_feature(enable = "sse2")]
2618#[cfg_attr(test, assert_instr(movntpd))]
2619#[stable(feature = "simd_x86", since = "1.27.0")]
2620#[allow(clippy::cast_ptr_alignment)]
2621pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2622 crate::arch::asm!(
2623 vps!("movntpd", ",{a}"),
2624 p = in(reg) mem_addr,
2625 a = in(xmm_reg) a,
2626 options(nostack, preserves_flags),
2627 );
2628}
2629
2630#[inline]
2635#[target_feature(enable = "sse2")]
2636#[cfg_attr(test, assert_instr(movlps))]
2637#[stable(feature = "simd_x86", since = "1.27.0")]
2638pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2639 *mem_addr = simd_extract!(a, 0)
2640}
2641
2642#[inline]
2648#[target_feature(enable = "sse2")]
2649#[cfg_attr(
2650 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2651 assert_instr(movaps)
2652)]
2653#[stable(feature = "simd_x86", since = "1.27.0")]
2654#[allow(clippy::cast_ptr_alignment)]
2655pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2656 *(mem_addr as *mut __m128d) = a;
2657}
2658
2659#[inline]
2665#[target_feature(enable = "sse2")]
2666#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2668pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2669 mem_addr.cast::<__m128d>().write_unaligned(a);
2670}
2671
2672#[inline]
2678#[target_feature(enable = "sse2")]
2679#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2680pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2681 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2682}
2683
2684#[inline]
2690#[target_feature(enable = "sse2")]
2691#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2692pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2693 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2694}
2695
2696#[inline]
2702#[target_feature(enable = "sse2")]
2703#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2704pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2705 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2706}
2707
2708#[inline]
2714#[target_feature(enable = "sse2")]
2715#[stable(feature = "simd_x86", since = "1.27.0")]
2716#[allow(clippy::cast_ptr_alignment)]
2717pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2718 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2719 *(mem_addr as *mut __m128d) = b;
2720}
2721
2722#[inline]
2728#[target_feature(enable = "sse2")]
2729#[stable(feature = "simd_x86", since = "1.27.0")]
2730#[allow(clippy::cast_ptr_alignment)]
2731pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2732 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2733 *(mem_addr as *mut __m128d) = b;
2734}
2735
2736#[inline]
2743#[target_feature(enable = "sse2")]
2744#[stable(feature = "simd_x86", since = "1.27.0")]
2745#[allow(clippy::cast_ptr_alignment)]
2746pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2747 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2748 *(mem_addr as *mut __m128d) = b;
2749}
2750
2751#[inline]
2756#[target_feature(enable = "sse2")]
2757#[cfg_attr(test, assert_instr(movhps))]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2759pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2760 *mem_addr = simd_extract!(a, 1);
2761}
2762
2763#[inline]
2768#[target_feature(enable = "sse2")]
2769#[cfg_attr(test, assert_instr(movlps))]
2770#[stable(feature = "simd_x86", since = "1.27.0")]
2771pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2772 *mem_addr = simd_extract!(a, 0);
2773}
2774
2775#[inline]
2780#[target_feature(enable = "sse2")]
2781#[stable(feature = "simd_x86", since = "1.27.0")]
2783pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2784 let d = *mem_addr;
2785 _mm_setr_pd(d, d)
2786}
2787
2788#[inline]
2793#[target_feature(enable = "sse2")]
2794#[stable(feature = "simd_x86", since = "1.27.0")]
2796pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2797 _mm_load1_pd(mem_addr)
2798}
2799
2800#[inline]
2806#[target_feature(enable = "sse2")]
2807#[cfg_attr(
2808 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2809 assert_instr(movaps)
2810)]
2811#[stable(feature = "simd_x86", since = "1.27.0")]
2812pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2813 let a = _mm_load_pd(mem_addr);
2814 simd_shuffle!(a, a, [1, 0])
2815}
2816
2817#[inline]
2823#[target_feature(enable = "sse2")]
2824#[cfg_attr(test, assert_instr(movups))]
2825#[stable(feature = "simd_x86", since = "1.27.0")]
2826pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2827 let mut dst = _mm_undefined_pd();
2828 ptr::copy_nonoverlapping(
2829 mem_addr as *const u8,
2830 ptr::addr_of_mut!(dst) as *mut u8,
2831 mem::size_of::<__m128d>(),
2832 );
2833 dst
2834}
2835
2836#[inline]
2842#[target_feature(enable = "sse2")]
2843#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2844pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2845 transmute(i16x8::new(
2846 ptr::read_unaligned(mem_addr as *const i16),
2847 0,
2848 0,
2849 0,
2850 0,
2851 0,
2852 0,
2853 0,
2854 ))
2855}
2856
2857#[inline]
2863#[target_feature(enable = "sse2")]
2864#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2865pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2866 transmute(i32x4::new(
2867 ptr::read_unaligned(mem_addr as *const i32),
2868 0,
2869 0,
2870 0,
2871 ))
2872}
2873
2874#[inline]
2880#[target_feature(enable = "sse2")]
2881#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2882pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2883 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2884}
2885
2886#[inline]
2892#[target_feature(enable = "sse2")]
2893#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2894#[rustc_legacy_const_generics(2)]
2895#[stable(feature = "simd_x86", since = "1.27.0")]
2896pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2897 static_assert_uimm_bits!(MASK, 8);
2898 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2899}
2900
2901#[inline]
2907#[target_feature(enable = "sse2")]
2908#[cfg_attr(test, assert_instr(movsd))]
2909#[stable(feature = "simd_x86", since = "1.27.0")]
2910pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2911 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2912}
2913
2914#[inline]
2919#[target_feature(enable = "sse2")]
2920#[stable(feature = "simd_x86", since = "1.27.0")]
2921pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2922 unsafe { transmute(a) }
2923}
2924
2925#[inline]
2930#[target_feature(enable = "sse2")]
2931#[stable(feature = "simd_x86", since = "1.27.0")]
2932pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2933 unsafe { transmute(a) }
2934}
2935
2936#[inline]
2941#[target_feature(enable = "sse2")]
2942#[stable(feature = "simd_x86", since = "1.27.0")]
2943pub fn _mm_castps_pd(a: __m128) -> __m128d {
2944 unsafe { transmute(a) }
2945}
2946
2947#[inline]
2952#[target_feature(enable = "sse2")]
2953#[stable(feature = "simd_x86", since = "1.27.0")]
2954pub fn _mm_castps_si128(a: __m128) -> __m128i {
2955 unsafe { transmute(a) }
2956}
2957
2958#[inline]
2963#[target_feature(enable = "sse2")]
2964#[stable(feature = "simd_x86", since = "1.27.0")]
2965pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2966 unsafe { transmute(a) }
2967}
2968
2969#[inline]
2974#[target_feature(enable = "sse2")]
2975#[stable(feature = "simd_x86", since = "1.27.0")]
2976pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2977 unsafe { transmute(a) }
2978}
2979
2980#[inline]
2987#[target_feature(enable = "sse2")]
2988#[stable(feature = "simd_x86", since = "1.27.0")]
2989pub fn _mm_undefined_pd() -> __m128d {
2990 const { unsafe { mem::zeroed() } }
2991}
2992
2993#[inline]
3000#[target_feature(enable = "sse2")]
3001#[stable(feature = "simd_x86", since = "1.27.0")]
3002pub fn _mm_undefined_si128() -> __m128i {
3003 const { unsafe { mem::zeroed() } }
3004}
3005
3006#[inline]
3014#[target_feature(enable = "sse2")]
3015#[cfg_attr(test, assert_instr(unpckhpd))]
3016#[stable(feature = "simd_x86", since = "1.27.0")]
3017pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3018 unsafe { simd_shuffle!(a, b, [1, 3]) }
3019}
3020
3021#[inline]
3029#[target_feature(enable = "sse2")]
3030#[cfg_attr(test, assert_instr(movlhps))]
3031#[stable(feature = "simd_x86", since = "1.27.0")]
3032pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3033 unsafe { simd_shuffle!(a, b, [0, 2]) }
3034}
3035
3036#[allow(improper_ctypes)]
3037unsafe extern "C" {
3038 #[link_name = "llvm.x86.sse2.pause"]
3039 fn pause();
3040 #[link_name = "llvm.x86.sse2.clflush"]
3041 fn clflush(p: *const u8);
3042 #[link_name = "llvm.x86.sse2.lfence"]
3043 fn lfence();
3044 #[link_name = "llvm.x86.sse2.mfence"]
3045 fn mfence();
3046 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3047 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3048 #[link_name = "llvm.x86.sse2.psad.bw"]
3049 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3050 #[link_name = "llvm.x86.sse2.psll.w"]
3051 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3052 #[link_name = "llvm.x86.sse2.psll.d"]
3053 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3054 #[link_name = "llvm.x86.sse2.psll.q"]
3055 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3056 #[link_name = "llvm.x86.sse2.psra.w"]
3057 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3058 #[link_name = "llvm.x86.sse2.psra.d"]
3059 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3060 #[link_name = "llvm.x86.sse2.psrl.w"]
3061 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3062 #[link_name = "llvm.x86.sse2.psrl.d"]
3063 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3064 #[link_name = "llvm.x86.sse2.psrl.q"]
3065 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3066 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3067 fn cvtps2dq(a: __m128) -> i32x4;
3068 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3069 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3070 #[link_name = "llvm.x86.sse2.packsswb.128"]
3071 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3072 #[link_name = "llvm.x86.sse2.packssdw.128"]
3073 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3074 #[link_name = "llvm.x86.sse2.packuswb.128"]
3075 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3076 #[link_name = "llvm.x86.sse2.max.sd"]
3077 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3078 #[link_name = "llvm.x86.sse2.max.pd"]
3079 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3080 #[link_name = "llvm.x86.sse2.min.sd"]
3081 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3082 #[link_name = "llvm.x86.sse2.min.pd"]
3083 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3084 #[link_name = "llvm.x86.sse2.cmp.sd"]
3085 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3086 #[link_name = "llvm.x86.sse2.cmp.pd"]
3087 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3088 #[link_name = "llvm.x86.sse2.comieq.sd"]
3089 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3090 #[link_name = "llvm.x86.sse2.comilt.sd"]
3091 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3092 #[link_name = "llvm.x86.sse2.comile.sd"]
3093 fn comilesd(a: __m128d, b: __m128d) -> i32;
3094 #[link_name = "llvm.x86.sse2.comigt.sd"]
3095 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3096 #[link_name = "llvm.x86.sse2.comige.sd"]
3097 fn comigesd(a: __m128d, b: __m128d) -> i32;
3098 #[link_name = "llvm.x86.sse2.comineq.sd"]
3099 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3100 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3101 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3102 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3103 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3104 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3105 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3106 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3107 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3108 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3109 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3110 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3111 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3112 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3113 fn cvtpd2dq(a: __m128d) -> i32x4;
3114 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3115 fn cvtsd2si(a: __m128d) -> i32;
3116 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3117 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3118 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3119 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3120 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3121 fn cvttpd2dq(a: __m128d) -> i32x4;
3122 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3123 fn cvttsd2si(a: __m128d) -> i32;
3124 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3125 fn cvttps2dq(a: __m128) -> i32x4;
3126}
3127
3128#[cfg(test)]
3129mod tests {
3130 use crate::{
3131 core_arch::{simd::*, x86::*},
3132 hint::black_box,
3133 };
3134 use std::{
3135 boxed, f32, f64,
3136 mem::{self, transmute},
3137 ptr,
3138 };
3139 use stdarch_test::simd_test;
3140
3141 const NAN: f64 = f64::NAN;
3142
3143 #[test]
3144 fn test_mm_pause() {
3145 unsafe { _mm_pause() }
3146 }
3147
3148 #[simd_test(enable = "sse2")]
3149 unsafe fn test_mm_clflush() {
3150 let x = 0_u8;
3151 _mm_clflush(ptr::addr_of!(x));
3152 }
3153
3154 #[simd_test(enable = "sse2")]
3155 #[cfg_attr(miri, ignore)]
3157 unsafe fn test_mm_lfence() {
3158 _mm_lfence();
3159 }
3160
3161 #[simd_test(enable = "sse2")]
3162 #[cfg_attr(miri, ignore)]
3164 unsafe fn test_mm_mfence() {
3165 _mm_mfence();
3166 }
3167
3168 #[simd_test(enable = "sse2")]
3169 unsafe fn test_mm_add_epi8() {
3170 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3171 #[rustfmt::skip]
3172 let b = _mm_setr_epi8(
3173 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3174 );
3175 let r = _mm_add_epi8(a, b);
3176 #[rustfmt::skip]
3177 let e = _mm_setr_epi8(
3178 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3179 );
3180 assert_eq_m128i(r, e);
3181 }
3182
3183 #[simd_test(enable = "sse2")]
3184 unsafe fn test_mm_add_epi8_overflow() {
3185 let a = _mm_set1_epi8(0x7F);
3186 let b = _mm_set1_epi8(1);
3187 let r = _mm_add_epi8(a, b);
3188 assert_eq_m128i(r, _mm_set1_epi8(-128));
3189 }
3190
3191 #[simd_test(enable = "sse2")]
3192 unsafe fn test_mm_add_epi16() {
3193 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3194 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3195 let r = _mm_add_epi16(a, b);
3196 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3197 assert_eq_m128i(r, e);
3198 }
3199
3200 #[simd_test(enable = "sse2")]
3201 unsafe fn test_mm_add_epi32() {
3202 let a = _mm_setr_epi32(0, 1, 2, 3);
3203 let b = _mm_setr_epi32(4, 5, 6, 7);
3204 let r = _mm_add_epi32(a, b);
3205 let e = _mm_setr_epi32(4, 6, 8, 10);
3206 assert_eq_m128i(r, e);
3207 }
3208
3209 #[simd_test(enable = "sse2")]
3210 unsafe fn test_mm_add_epi64() {
3211 let a = _mm_setr_epi64x(0, 1);
3212 let b = _mm_setr_epi64x(2, 3);
3213 let r = _mm_add_epi64(a, b);
3214 let e = _mm_setr_epi64x(2, 4);
3215 assert_eq_m128i(r, e);
3216 }
3217
3218 #[simd_test(enable = "sse2")]
3219 unsafe fn test_mm_adds_epi8() {
3220 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3221 #[rustfmt::skip]
3222 let b = _mm_setr_epi8(
3223 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3224 );
3225 let r = _mm_adds_epi8(a, b);
3226 #[rustfmt::skip]
3227 let e = _mm_setr_epi8(
3228 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3229 );
3230 assert_eq_m128i(r, e);
3231 }
3232
3233 #[simd_test(enable = "sse2")]
3234 unsafe fn test_mm_adds_epi8_saturate_positive() {
3235 let a = _mm_set1_epi8(0x7F);
3236 let b = _mm_set1_epi8(1);
3237 let r = _mm_adds_epi8(a, b);
3238 assert_eq_m128i(r, a);
3239 }
3240
3241 #[simd_test(enable = "sse2")]
3242 unsafe fn test_mm_adds_epi8_saturate_negative() {
3243 let a = _mm_set1_epi8(-0x80);
3244 let b = _mm_set1_epi8(-1);
3245 let r = _mm_adds_epi8(a, b);
3246 assert_eq_m128i(r, a);
3247 }
3248
3249 #[simd_test(enable = "sse2")]
3250 unsafe fn test_mm_adds_epi16() {
3251 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3252 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3253 let r = _mm_adds_epi16(a, b);
3254 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3255 assert_eq_m128i(r, e);
3256 }
3257
3258 #[simd_test(enable = "sse2")]
3259 unsafe fn test_mm_adds_epi16_saturate_positive() {
3260 let a = _mm_set1_epi16(0x7FFF);
3261 let b = _mm_set1_epi16(1);
3262 let r = _mm_adds_epi16(a, b);
3263 assert_eq_m128i(r, a);
3264 }
3265
3266 #[simd_test(enable = "sse2")]
3267 unsafe fn test_mm_adds_epi16_saturate_negative() {
3268 let a = _mm_set1_epi16(-0x8000);
3269 let b = _mm_set1_epi16(-1);
3270 let r = _mm_adds_epi16(a, b);
3271 assert_eq_m128i(r, a);
3272 }
3273
3274 #[simd_test(enable = "sse2")]
3275 unsafe fn test_mm_adds_epu8() {
3276 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3277 #[rustfmt::skip]
3278 let b = _mm_setr_epi8(
3279 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3280 );
3281 let r = _mm_adds_epu8(a, b);
3282 #[rustfmt::skip]
3283 let e = _mm_setr_epi8(
3284 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3285 );
3286 assert_eq_m128i(r, e);
3287 }
3288
3289 #[simd_test(enable = "sse2")]
3290 unsafe fn test_mm_adds_epu8_saturate() {
3291 let a = _mm_set1_epi8(!0);
3292 let b = _mm_set1_epi8(1);
3293 let r = _mm_adds_epu8(a, b);
3294 assert_eq_m128i(r, a);
3295 }
3296
3297 #[simd_test(enable = "sse2")]
3298 unsafe fn test_mm_adds_epu16() {
3299 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3300 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3301 let r = _mm_adds_epu16(a, b);
3302 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3303 assert_eq_m128i(r, e);
3304 }
3305
3306 #[simd_test(enable = "sse2")]
3307 unsafe fn test_mm_adds_epu16_saturate() {
3308 let a = _mm_set1_epi16(!0);
3309 let b = _mm_set1_epi16(1);
3310 let r = _mm_adds_epu16(a, b);
3311 assert_eq_m128i(r, a);
3312 }
3313
3314 #[simd_test(enable = "sse2")]
3315 unsafe fn test_mm_avg_epu8() {
3316 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3317 let r = _mm_avg_epu8(a, b);
3318 assert_eq_m128i(r, _mm_set1_epi8(6));
3319 }
3320
3321 #[simd_test(enable = "sse2")]
3322 unsafe fn test_mm_avg_epu16() {
3323 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3324 let r = _mm_avg_epu16(a, b);
3325 assert_eq_m128i(r, _mm_set1_epi16(6));
3326 }
3327
3328 #[simd_test(enable = "sse2")]
3329 unsafe fn test_mm_madd_epi16() {
3330 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3331 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3332 let r = _mm_madd_epi16(a, b);
3333 let e = _mm_setr_epi32(29, 81, 149, 233);
3334 assert_eq_m128i(r, e);
3335
3336 let a = _mm_setr_epi16(
3339 i16::MAX,
3340 i16::MAX,
3341 i16::MIN,
3342 i16::MIN,
3343 i16::MIN,
3344 i16::MAX,
3345 0,
3346 0,
3347 );
3348 let b = _mm_setr_epi16(
3349 i16::MAX,
3350 i16::MAX,
3351 i16::MIN,
3352 i16::MIN,
3353 i16::MAX,
3354 i16::MIN,
3355 0,
3356 0,
3357 );
3358 let r = _mm_madd_epi16(a, b);
3359 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3360 assert_eq_m128i(r, e);
3361 }
3362
3363 #[simd_test(enable = "sse2")]
3364 unsafe fn test_mm_max_epi16() {
3365 let a = _mm_set1_epi16(1);
3366 let b = _mm_set1_epi16(-1);
3367 let r = _mm_max_epi16(a, b);
3368 assert_eq_m128i(r, a);
3369 }
3370
3371 #[simd_test(enable = "sse2")]
3372 unsafe fn test_mm_max_epu8() {
3373 let a = _mm_set1_epi8(1);
3374 let b = _mm_set1_epi8(!0);
3375 let r = _mm_max_epu8(a, b);
3376 assert_eq_m128i(r, b);
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_min_epi16() {
3381 let a = _mm_set1_epi16(1);
3382 let b = _mm_set1_epi16(-1);
3383 let r = _mm_min_epi16(a, b);
3384 assert_eq_m128i(r, b);
3385 }
3386
3387 #[simd_test(enable = "sse2")]
3388 unsafe fn test_mm_min_epu8() {
3389 let a = _mm_set1_epi8(1);
3390 let b = _mm_set1_epi8(!0);
3391 let r = _mm_min_epu8(a, b);
3392 assert_eq_m128i(r, a);
3393 }
3394
3395 #[simd_test(enable = "sse2")]
3396 unsafe fn test_mm_mulhi_epi16() {
3397 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3398 let r = _mm_mulhi_epi16(a, b);
3399 assert_eq_m128i(r, _mm_set1_epi16(-16));
3400 }
3401
3402 #[simd_test(enable = "sse2")]
3403 unsafe fn test_mm_mulhi_epu16() {
3404 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3405 let r = _mm_mulhi_epu16(a, b);
3406 assert_eq_m128i(r, _mm_set1_epi16(15));
3407 }
3408
3409 #[simd_test(enable = "sse2")]
3410 unsafe fn test_mm_mullo_epi16() {
3411 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3412 let r = _mm_mullo_epi16(a, b);
3413 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3414 }
3415
3416 #[simd_test(enable = "sse2")]
3417 unsafe fn test_mm_mul_epu32() {
3418 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3419 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3420 let r = _mm_mul_epu32(a, b);
3421 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3422 assert_eq_m128i(r, e);
3423 }
3424
3425 #[simd_test(enable = "sse2")]
3426 unsafe fn test_mm_sad_epu8() {
3427 #[rustfmt::skip]
3428 let a = _mm_setr_epi8(
3429 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3430 1, 2, 3, 4,
3431 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3432 1, 2, 3, 4,
3433 );
3434 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3435 let r = _mm_sad_epu8(a, b);
3436 let e = _mm_setr_epi64x(1020, 614);
3437 assert_eq_m128i(r, e);
3438 }
3439
3440 #[simd_test(enable = "sse2")]
3441 unsafe fn test_mm_sub_epi8() {
3442 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3443 let r = _mm_sub_epi8(a, b);
3444 assert_eq_m128i(r, _mm_set1_epi8(-1));
3445 }
3446
3447 #[simd_test(enable = "sse2")]
3448 unsafe fn test_mm_sub_epi16() {
3449 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3450 let r = _mm_sub_epi16(a, b);
3451 assert_eq_m128i(r, _mm_set1_epi16(-1));
3452 }
3453
3454 #[simd_test(enable = "sse2")]
3455 unsafe fn test_mm_sub_epi32() {
3456 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3457 let r = _mm_sub_epi32(a, b);
3458 assert_eq_m128i(r, _mm_set1_epi32(-1));
3459 }
3460
3461 #[simd_test(enable = "sse2")]
3462 unsafe fn test_mm_sub_epi64() {
3463 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3464 let r = _mm_sub_epi64(a, b);
3465 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3466 }
3467
3468 #[simd_test(enable = "sse2")]
3469 unsafe fn test_mm_subs_epi8() {
3470 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3471 let r = _mm_subs_epi8(a, b);
3472 assert_eq_m128i(r, _mm_set1_epi8(3));
3473 }
3474
3475 #[simd_test(enable = "sse2")]
3476 unsafe fn test_mm_subs_epi8_saturate_positive() {
3477 let a = _mm_set1_epi8(0x7F);
3478 let b = _mm_set1_epi8(-1);
3479 let r = _mm_subs_epi8(a, b);
3480 assert_eq_m128i(r, a);
3481 }
3482
3483 #[simd_test(enable = "sse2")]
3484 unsafe fn test_mm_subs_epi8_saturate_negative() {
3485 let a = _mm_set1_epi8(-0x80);
3486 let b = _mm_set1_epi8(1);
3487 let r = _mm_subs_epi8(a, b);
3488 assert_eq_m128i(r, a);
3489 }
3490
3491 #[simd_test(enable = "sse2")]
3492 unsafe fn test_mm_subs_epi16() {
3493 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3494 let r = _mm_subs_epi16(a, b);
3495 assert_eq_m128i(r, _mm_set1_epi16(3));
3496 }
3497
3498 #[simd_test(enable = "sse2")]
3499 unsafe fn test_mm_subs_epi16_saturate_positive() {
3500 let a = _mm_set1_epi16(0x7FFF);
3501 let b = _mm_set1_epi16(-1);
3502 let r = _mm_subs_epi16(a, b);
3503 assert_eq_m128i(r, a);
3504 }
3505
3506 #[simd_test(enable = "sse2")]
3507 unsafe fn test_mm_subs_epi16_saturate_negative() {
3508 let a = _mm_set1_epi16(-0x8000);
3509 let b = _mm_set1_epi16(1);
3510 let r = _mm_subs_epi16(a, b);
3511 assert_eq_m128i(r, a);
3512 }
3513
3514 #[simd_test(enable = "sse2")]
3515 unsafe fn test_mm_subs_epu8() {
3516 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3517 let r = _mm_subs_epu8(a, b);
3518 assert_eq_m128i(r, _mm_set1_epi8(3));
3519 }
3520
3521 #[simd_test(enable = "sse2")]
3522 unsafe fn test_mm_subs_epu8_saturate() {
3523 let a = _mm_set1_epi8(0);
3524 let b = _mm_set1_epi8(1);
3525 let r = _mm_subs_epu8(a, b);
3526 assert_eq_m128i(r, a);
3527 }
3528
3529 #[simd_test(enable = "sse2")]
3530 unsafe fn test_mm_subs_epu16() {
3531 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3532 let r = _mm_subs_epu16(a, b);
3533 assert_eq_m128i(r, _mm_set1_epi16(3));
3534 }
3535
3536 #[simd_test(enable = "sse2")]
3537 unsafe fn test_mm_subs_epu16_saturate() {
3538 let a = _mm_set1_epi16(0);
3539 let b = _mm_set1_epi16(1);
3540 let r = _mm_subs_epu16(a, b);
3541 assert_eq_m128i(r, a);
3542 }
3543
3544 #[simd_test(enable = "sse2")]
3545 unsafe fn test_mm_slli_si128() {
3546 #[rustfmt::skip]
3547 let a = _mm_setr_epi8(
3548 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3549 );
3550 let r = _mm_slli_si128::<1>(a);
3551 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3552 assert_eq_m128i(r, e);
3553
3554 #[rustfmt::skip]
3555 let a = _mm_setr_epi8(
3556 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3557 );
3558 let r = _mm_slli_si128::<15>(a);
3559 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3560 assert_eq_m128i(r, e);
3561
3562 #[rustfmt::skip]
3563 let a = _mm_setr_epi8(
3564 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3565 );
3566 let r = _mm_slli_si128::<16>(a);
3567 assert_eq_m128i(r, _mm_set1_epi8(0));
3568 }
3569
3570 #[simd_test(enable = "sse2")]
3571 unsafe fn test_mm_slli_epi16() {
3572 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3573 let r = _mm_slli_epi16::<4>(a);
3574 assert_eq_m128i(
3575 r,
3576 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3577 );
3578 let r = _mm_slli_epi16::<16>(a);
3579 assert_eq_m128i(r, _mm_set1_epi16(0));
3580 }
3581
3582 #[simd_test(enable = "sse2")]
3583 unsafe fn test_mm_sll_epi16() {
3584 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3585 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3586 assert_eq_m128i(
3587 r,
3588 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3589 );
3590 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3591 assert_eq_m128i(r, a);
3592 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3593 assert_eq_m128i(r, _mm_set1_epi16(0));
3594 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3595 assert_eq_m128i(r, _mm_set1_epi16(0));
3596 }
3597
3598 #[simd_test(enable = "sse2")]
3599 unsafe fn test_mm_slli_epi32() {
3600 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3601 let r = _mm_slli_epi32::<4>(a);
3602 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3603 let r = _mm_slli_epi32::<32>(a);
3604 assert_eq_m128i(r, _mm_set1_epi32(0));
3605 }
3606
3607 #[simd_test(enable = "sse2")]
3608 unsafe fn test_mm_sll_epi32() {
3609 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3610 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3611 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3612 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3613 assert_eq_m128i(r, a);
3614 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3615 assert_eq_m128i(r, _mm_set1_epi32(0));
3616 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3617 assert_eq_m128i(r, _mm_set1_epi32(0));
3618 }
3619
3620 #[simd_test(enable = "sse2")]
3621 unsafe fn test_mm_slli_epi64() {
3622 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3623 let r = _mm_slli_epi64::<4>(a);
3624 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3625 let r = _mm_slli_epi64::<64>(a);
3626 assert_eq_m128i(r, _mm_set1_epi64x(0));
3627 }
3628
3629 #[simd_test(enable = "sse2")]
3630 unsafe fn test_mm_sll_epi64() {
3631 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3632 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3633 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3634 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3635 assert_eq_m128i(r, a);
3636 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3637 assert_eq_m128i(r, _mm_set1_epi64x(0));
3638 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3639 assert_eq_m128i(r, _mm_set1_epi64x(0));
3640 }
3641
3642 #[simd_test(enable = "sse2")]
3643 unsafe fn test_mm_srai_epi16() {
3644 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3645 let r = _mm_srai_epi16::<4>(a);
3646 assert_eq_m128i(
3647 r,
3648 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3649 );
3650 let r = _mm_srai_epi16::<16>(a);
3651 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3652 }
3653
3654 #[simd_test(enable = "sse2")]
3655 unsafe fn test_mm_sra_epi16() {
3656 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3657 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3658 assert_eq_m128i(
3659 r,
3660 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3661 );
3662 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3663 assert_eq_m128i(r, a);
3664 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3665 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3666 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3667 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3668 }
3669
3670 #[simd_test(enable = "sse2")]
3671 unsafe fn test_mm_srai_epi32() {
3672 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3673 let r = _mm_srai_epi32::<4>(a);
3674 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3675 let r = _mm_srai_epi32::<32>(a);
3676 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3677 }
3678
3679 #[simd_test(enable = "sse2")]
3680 unsafe fn test_mm_sra_epi32() {
3681 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3682 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3683 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3684 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3685 assert_eq_m128i(r, a);
3686 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3687 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3688 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3689 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3690 }
3691
3692 #[simd_test(enable = "sse2")]
3693 unsafe fn test_mm_srli_si128() {
3694 #[rustfmt::skip]
3695 let a = _mm_setr_epi8(
3696 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3697 );
3698 let r = _mm_srli_si128::<1>(a);
3699 #[rustfmt::skip]
3700 let e = _mm_setr_epi8(
3701 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3702 );
3703 assert_eq_m128i(r, e);
3704
3705 #[rustfmt::skip]
3706 let a = _mm_setr_epi8(
3707 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3708 );
3709 let r = _mm_srli_si128::<15>(a);
3710 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3711 assert_eq_m128i(r, e);
3712
3713 #[rustfmt::skip]
3714 let a = _mm_setr_epi8(
3715 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3716 );
3717 let r = _mm_srli_si128::<16>(a);
3718 assert_eq_m128i(r, _mm_set1_epi8(0));
3719 }
3720
3721 #[simd_test(enable = "sse2")]
3722 unsafe fn test_mm_srli_epi16() {
3723 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3724 let r = _mm_srli_epi16::<4>(a);
3725 assert_eq_m128i(
3726 r,
3727 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3728 );
3729 let r = _mm_srli_epi16::<16>(a);
3730 assert_eq_m128i(r, _mm_set1_epi16(0));
3731 }
3732
3733 #[simd_test(enable = "sse2")]
3734 unsafe fn test_mm_srl_epi16() {
3735 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3736 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3737 assert_eq_m128i(
3738 r,
3739 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3740 );
3741 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3742 assert_eq_m128i(r, a);
3743 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3744 assert_eq_m128i(r, _mm_set1_epi16(0));
3745 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3746 assert_eq_m128i(r, _mm_set1_epi16(0));
3747 }
3748
3749 #[simd_test(enable = "sse2")]
3750 unsafe fn test_mm_srli_epi32() {
3751 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3752 let r = _mm_srli_epi32::<4>(a);
3753 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3754 let r = _mm_srli_epi32::<32>(a);
3755 assert_eq_m128i(r, _mm_set1_epi32(0));
3756 }
3757
3758 #[simd_test(enable = "sse2")]
3759 unsafe fn test_mm_srl_epi32() {
3760 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3761 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3762 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3763 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3764 assert_eq_m128i(r, a);
3765 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3766 assert_eq_m128i(r, _mm_set1_epi32(0));
3767 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3768 assert_eq_m128i(r, _mm_set1_epi32(0));
3769 }
3770
3771 #[simd_test(enable = "sse2")]
3772 unsafe fn test_mm_srli_epi64() {
3773 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3774 let r = _mm_srli_epi64::<4>(a);
3775 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3776 let r = _mm_srli_epi64::<64>(a);
3777 assert_eq_m128i(r, _mm_set1_epi64x(0));
3778 }
3779
3780 #[simd_test(enable = "sse2")]
3781 unsafe fn test_mm_srl_epi64() {
3782 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3783 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3784 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3785 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3786 assert_eq_m128i(r, a);
3787 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3788 assert_eq_m128i(r, _mm_set1_epi64x(0));
3789 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3790 assert_eq_m128i(r, _mm_set1_epi64x(0));
3791 }
3792
3793 #[simd_test(enable = "sse2")]
3794 unsafe fn test_mm_and_si128() {
3795 let a = _mm_set1_epi8(5);
3796 let b = _mm_set1_epi8(3);
3797 let r = _mm_and_si128(a, b);
3798 assert_eq_m128i(r, _mm_set1_epi8(1));
3799 }
3800
3801 #[simd_test(enable = "sse2")]
3802 unsafe fn test_mm_andnot_si128() {
3803 let a = _mm_set1_epi8(5);
3804 let b = _mm_set1_epi8(3);
3805 let r = _mm_andnot_si128(a, b);
3806 assert_eq_m128i(r, _mm_set1_epi8(2));
3807 }
3808
3809 #[simd_test(enable = "sse2")]
3810 unsafe fn test_mm_or_si128() {
3811 let a = _mm_set1_epi8(5);
3812 let b = _mm_set1_epi8(3);
3813 let r = _mm_or_si128(a, b);
3814 assert_eq_m128i(r, _mm_set1_epi8(7));
3815 }
3816
3817 #[simd_test(enable = "sse2")]
3818 unsafe fn test_mm_xor_si128() {
3819 let a = _mm_set1_epi8(5);
3820 let b = _mm_set1_epi8(3);
3821 let r = _mm_xor_si128(a, b);
3822 assert_eq_m128i(r, _mm_set1_epi8(6));
3823 }
3824
3825 #[simd_test(enable = "sse2")]
3826 unsafe fn test_mm_cmpeq_epi8() {
3827 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3828 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3829 let r = _mm_cmpeq_epi8(a, b);
3830 #[rustfmt::skip]
3831 assert_eq_m128i(
3832 r,
3833 _mm_setr_epi8(
3834 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3835 )
3836 );
3837 }
3838
3839 #[simd_test(enable = "sse2")]
3840 unsafe fn test_mm_cmpeq_epi16() {
3841 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3842 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3843 let r = _mm_cmpeq_epi16(a, b);
3844 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3845 }
3846
3847 #[simd_test(enable = "sse2")]
3848 unsafe fn test_mm_cmpeq_epi32() {
3849 let a = _mm_setr_epi32(0, 1, 2, 3);
3850 let b = _mm_setr_epi32(3, 2, 2, 0);
3851 let r = _mm_cmpeq_epi32(a, b);
3852 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3853 }
3854
3855 #[simd_test(enable = "sse2")]
3856 unsafe fn test_mm_cmpgt_epi8() {
3857 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3858 let b = _mm_set1_epi8(0);
3859 let r = _mm_cmpgt_epi8(a, b);
3860 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3861 assert_eq_m128i(r, e);
3862 }
3863
3864 #[simd_test(enable = "sse2")]
3865 unsafe fn test_mm_cmpgt_epi16() {
3866 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3867 let b = _mm_set1_epi16(0);
3868 let r = _mm_cmpgt_epi16(a, b);
3869 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3870 assert_eq_m128i(r, e);
3871 }
3872
3873 #[simd_test(enable = "sse2")]
3874 unsafe fn test_mm_cmpgt_epi32() {
3875 let a = _mm_set_epi32(5, 0, 0, 0);
3876 let b = _mm_set1_epi32(0);
3877 let r = _mm_cmpgt_epi32(a, b);
3878 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3879 }
3880
3881 #[simd_test(enable = "sse2")]
3882 unsafe fn test_mm_cmplt_epi8() {
3883 let a = _mm_set1_epi8(0);
3884 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3885 let r = _mm_cmplt_epi8(a, b);
3886 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3887 assert_eq_m128i(r, e);
3888 }
3889
3890 #[simd_test(enable = "sse2")]
3891 unsafe fn test_mm_cmplt_epi16() {
3892 let a = _mm_set1_epi16(0);
3893 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3894 let r = _mm_cmplt_epi16(a, b);
3895 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3896 assert_eq_m128i(r, e);
3897 }
3898
3899 #[simd_test(enable = "sse2")]
3900 unsafe fn test_mm_cmplt_epi32() {
3901 let a = _mm_set1_epi32(0);
3902 let b = _mm_set_epi32(5, 0, 0, 0);
3903 let r = _mm_cmplt_epi32(a, b);
3904 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3905 }
3906
3907 #[simd_test(enable = "sse2")]
3908 unsafe fn test_mm_cvtepi32_pd() {
3909 let a = _mm_set_epi32(35, 25, 15, 5);
3910 let r = _mm_cvtepi32_pd(a);
3911 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3912 }
3913
3914 #[simd_test(enable = "sse2")]
3915 unsafe fn test_mm_cvtsi32_sd() {
3916 let a = _mm_set1_pd(3.5);
3917 let r = _mm_cvtsi32_sd(a, 5);
3918 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3919 }
3920
3921 #[simd_test(enable = "sse2")]
3922 unsafe fn test_mm_cvtepi32_ps() {
3923 let a = _mm_setr_epi32(1, 2, 3, 4);
3924 let r = _mm_cvtepi32_ps(a);
3925 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3926 }
3927
3928 #[simd_test(enable = "sse2")]
3929 unsafe fn test_mm_cvtps_epi32() {
3930 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3931 let r = _mm_cvtps_epi32(a);
3932 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3933 }
3934
3935 #[simd_test(enable = "sse2")]
3936 unsafe fn test_mm_cvtsi32_si128() {
3937 let r = _mm_cvtsi32_si128(5);
3938 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3939 }
3940
3941 #[simd_test(enable = "sse2")]
3942 unsafe fn test_mm_cvtsi128_si32() {
3943 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3944 assert_eq!(r, 5);
3945 }
3946
3947 #[simd_test(enable = "sse2")]
3948 unsafe fn test_mm_set_epi64x() {
3949 let r = _mm_set_epi64x(0, 1);
3950 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3951 }
3952
3953 #[simd_test(enable = "sse2")]
3954 unsafe fn test_mm_set_epi32() {
3955 let r = _mm_set_epi32(0, 1, 2, 3);
3956 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3957 }
3958
3959 #[simd_test(enable = "sse2")]
3960 unsafe fn test_mm_set_epi16() {
3961 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3962 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3963 }
3964
3965 #[simd_test(enable = "sse2")]
3966 unsafe fn test_mm_set_epi8() {
3967 #[rustfmt::skip]
3968 let r = _mm_set_epi8(
3969 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3970 );
3971 #[rustfmt::skip]
3972 let e = _mm_setr_epi8(
3973 15, 14, 13, 12, 11, 10, 9, 8,
3974 7, 6, 5, 4, 3, 2, 1, 0,
3975 );
3976 assert_eq_m128i(r, e);
3977 }
3978
3979 #[simd_test(enable = "sse2")]
3980 unsafe fn test_mm_set1_epi64x() {
3981 let r = _mm_set1_epi64x(1);
3982 assert_eq_m128i(r, _mm_set1_epi64x(1));
3983 }
3984
3985 #[simd_test(enable = "sse2")]
3986 unsafe fn test_mm_set1_epi32() {
3987 let r = _mm_set1_epi32(1);
3988 assert_eq_m128i(r, _mm_set1_epi32(1));
3989 }
3990
3991 #[simd_test(enable = "sse2")]
3992 unsafe fn test_mm_set1_epi16() {
3993 let r = _mm_set1_epi16(1);
3994 assert_eq_m128i(r, _mm_set1_epi16(1));
3995 }
3996
3997 #[simd_test(enable = "sse2")]
3998 unsafe fn test_mm_set1_epi8() {
3999 let r = _mm_set1_epi8(1);
4000 assert_eq_m128i(r, _mm_set1_epi8(1));
4001 }
4002
4003 #[simd_test(enable = "sse2")]
4004 unsafe fn test_mm_setr_epi32() {
4005 let r = _mm_setr_epi32(0, 1, 2, 3);
4006 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4007 }
4008
4009 #[simd_test(enable = "sse2")]
4010 unsafe fn test_mm_setr_epi16() {
4011 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4012 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4013 }
4014
4015 #[simd_test(enable = "sse2")]
4016 unsafe fn test_mm_setr_epi8() {
4017 #[rustfmt::skip]
4018 let r = _mm_setr_epi8(
4019 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4020 );
4021 #[rustfmt::skip]
4022 let e = _mm_setr_epi8(
4023 0, 1, 2, 3, 4, 5, 6, 7,
4024 8, 9, 10, 11, 12, 13, 14, 15,
4025 );
4026 assert_eq_m128i(r, e);
4027 }
4028
4029 #[simd_test(enable = "sse2")]
4030 unsafe fn test_mm_setzero_si128() {
4031 let r = _mm_setzero_si128();
4032 assert_eq_m128i(r, _mm_set1_epi64x(0));
4033 }
4034
4035 #[simd_test(enable = "sse2")]
4036 unsafe fn test_mm_loadl_epi64() {
4037 let a = _mm_setr_epi64x(6, 5);
4038 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4039 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4040 }
4041
4042 #[simd_test(enable = "sse2")]
4043 unsafe fn test_mm_load_si128() {
4044 let a = _mm_set_epi64x(5, 6);
4045 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4046 assert_eq_m128i(a, r);
4047 }
4048
4049 #[simd_test(enable = "sse2")]
4050 unsafe fn test_mm_loadu_si128() {
4051 let a = _mm_set_epi64x(5, 6);
4052 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4053 assert_eq_m128i(a, r);
4054 }
4055
4056 #[simd_test(enable = "sse2")]
4057 #[cfg_attr(miri, ignore)]
4060 unsafe fn test_mm_maskmoveu_si128() {
4061 let a = _mm_set1_epi8(9);
4062 #[rustfmt::skip]
4063 let mask = _mm_set_epi8(
4064 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4065 0, 0, 0, 0, 0, 0, 0, 0,
4066 );
4067 let mut r = _mm_set1_epi8(0);
4068 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4069 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4070 assert_eq_m128i(r, e);
4071 }
4072
4073 #[simd_test(enable = "sse2")]
4074 unsafe fn test_mm_store_si128() {
4075 let a = _mm_set1_epi8(9);
4076 let mut r = _mm_set1_epi8(0);
4077 _mm_store_si128(&mut r, a);
4078 assert_eq_m128i(r, a);
4079 }
4080
4081 #[simd_test(enable = "sse2")]
4082 unsafe fn test_mm_storeu_si128() {
4083 let a = _mm_set1_epi8(9);
4084 let mut r = _mm_set1_epi8(0);
4085 _mm_storeu_si128(&mut r, a);
4086 assert_eq_m128i(r, a);
4087 }
4088
4089 #[simd_test(enable = "sse2")]
4090 unsafe fn test_mm_storel_epi64() {
4091 let a = _mm_setr_epi64x(2, 9);
4092 let mut r = _mm_set1_epi8(0);
4093 _mm_storel_epi64(&mut r, a);
4094 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4095 }
4096
4097 #[simd_test(enable = "sse2")]
4098 #[cfg_attr(miri, ignore)]
4101 unsafe fn test_mm_stream_si128() {
4102 let a = _mm_setr_epi32(1, 2, 3, 4);
4103 let mut r = _mm_undefined_si128();
4104 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4105 assert_eq_m128i(r, a);
4106 }
4107
4108 #[simd_test(enable = "sse2")]
4109 #[cfg_attr(miri, ignore)]
4112 unsafe fn test_mm_stream_si32() {
4113 let a: i32 = 7;
4114 let mut mem = boxed::Box::<i32>::new(-1);
4115 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4116 assert_eq!(a, *mem);
4117 }
4118
4119 #[simd_test(enable = "sse2")]
4120 unsafe fn test_mm_move_epi64() {
4121 let a = _mm_setr_epi64x(5, 6);
4122 let r = _mm_move_epi64(a);
4123 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4124 }
4125
4126 #[simd_test(enable = "sse2")]
4127 unsafe fn test_mm_packs_epi16() {
4128 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4129 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4130 let r = _mm_packs_epi16(a, b);
4131 #[rustfmt::skip]
4132 assert_eq_m128i(
4133 r,
4134 _mm_setr_epi8(
4135 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4136 )
4137 );
4138 }
4139
4140 #[simd_test(enable = "sse2")]
4141 unsafe fn test_mm_packs_epi32() {
4142 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4143 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4144 let r = _mm_packs_epi32(a, b);
4145 assert_eq_m128i(
4146 r,
4147 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4148 );
4149 }
4150
4151 #[simd_test(enable = "sse2")]
4152 unsafe fn test_mm_packus_epi16() {
4153 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4154 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4155 let r = _mm_packus_epi16(a, b);
4156 assert_eq_m128i(
4157 r,
4158 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4159 );
4160 }
4161
4162 #[simd_test(enable = "sse2")]
4163 unsafe fn test_mm_extract_epi16() {
4164 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4165 let r1 = _mm_extract_epi16::<0>(a);
4166 let r2 = _mm_extract_epi16::<3>(a);
4167 assert_eq!(r1, 0xFFFF);
4168 assert_eq!(r2, 3);
4169 }
4170
4171 #[simd_test(enable = "sse2")]
4172 unsafe fn test_mm_insert_epi16() {
4173 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4174 let r = _mm_insert_epi16::<0>(a, 9);
4175 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4176 assert_eq_m128i(r, e);
4177 }
4178
4179 #[simd_test(enable = "sse2")]
4180 unsafe fn test_mm_movemask_epi8() {
4181 #[rustfmt::skip]
4182 let a = _mm_setr_epi8(
4183 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4184 0b0101, 0b1111_0000u8 as i8, 0, 0,
4185 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4186 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4187 );
4188 let r = _mm_movemask_epi8(a);
4189 assert_eq!(r, 0b10100110_00100101);
4190 }
4191
4192 #[simd_test(enable = "sse2")]
4193 unsafe fn test_mm_shuffle_epi32() {
4194 let a = _mm_setr_epi32(5, 10, 15, 20);
4195 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4196 let e = _mm_setr_epi32(20, 10, 10, 5);
4197 assert_eq_m128i(r, e);
4198 }
4199
4200 #[simd_test(enable = "sse2")]
4201 unsafe fn test_mm_shufflehi_epi16() {
4202 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4203 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4204 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4205 assert_eq_m128i(r, e);
4206 }
4207
4208 #[simd_test(enable = "sse2")]
4209 unsafe fn test_mm_shufflelo_epi16() {
4210 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4211 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4212 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4213 assert_eq_m128i(r, e);
4214 }
4215
4216 #[simd_test(enable = "sse2")]
4217 unsafe fn test_mm_unpackhi_epi8() {
4218 #[rustfmt::skip]
4219 let a = _mm_setr_epi8(
4220 0, 1, 2, 3, 4, 5, 6, 7,
4221 8, 9, 10, 11, 12, 13, 14, 15,
4222 );
4223 #[rustfmt::skip]
4224 let b = _mm_setr_epi8(
4225 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4226 );
4227 let r = _mm_unpackhi_epi8(a, b);
4228 #[rustfmt::skip]
4229 let e = _mm_setr_epi8(
4230 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4231 );
4232 assert_eq_m128i(r, e);
4233 }
4234
4235 #[simd_test(enable = "sse2")]
4236 unsafe fn test_mm_unpackhi_epi16() {
4237 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4238 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4239 let r = _mm_unpackhi_epi16(a, b);
4240 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4241 assert_eq_m128i(r, e);
4242 }
4243
4244 #[simd_test(enable = "sse2")]
4245 unsafe fn test_mm_unpackhi_epi32() {
4246 let a = _mm_setr_epi32(0, 1, 2, 3);
4247 let b = _mm_setr_epi32(4, 5, 6, 7);
4248 let r = _mm_unpackhi_epi32(a, b);
4249 let e = _mm_setr_epi32(2, 6, 3, 7);
4250 assert_eq_m128i(r, e);
4251 }
4252
4253 #[simd_test(enable = "sse2")]
4254 unsafe fn test_mm_unpackhi_epi64() {
4255 let a = _mm_setr_epi64x(0, 1);
4256 let b = _mm_setr_epi64x(2, 3);
4257 let r = _mm_unpackhi_epi64(a, b);
4258 let e = _mm_setr_epi64x(1, 3);
4259 assert_eq_m128i(r, e);
4260 }
4261
4262 #[simd_test(enable = "sse2")]
4263 unsafe fn test_mm_unpacklo_epi8() {
4264 #[rustfmt::skip]
4265 let a = _mm_setr_epi8(
4266 0, 1, 2, 3, 4, 5, 6, 7,
4267 8, 9, 10, 11, 12, 13, 14, 15,
4268 );
4269 #[rustfmt::skip]
4270 let b = _mm_setr_epi8(
4271 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4272 );
4273 let r = _mm_unpacklo_epi8(a, b);
4274 #[rustfmt::skip]
4275 let e = _mm_setr_epi8(
4276 0, 16, 1, 17, 2, 18, 3, 19,
4277 4, 20, 5, 21, 6, 22, 7, 23,
4278 );
4279 assert_eq_m128i(r, e);
4280 }
4281
4282 #[simd_test(enable = "sse2")]
4283 unsafe fn test_mm_unpacklo_epi16() {
4284 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4285 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4286 let r = _mm_unpacklo_epi16(a, b);
4287 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4288 assert_eq_m128i(r, e);
4289 }
4290
4291 #[simd_test(enable = "sse2")]
4292 unsafe fn test_mm_unpacklo_epi32() {
4293 let a = _mm_setr_epi32(0, 1, 2, 3);
4294 let b = _mm_setr_epi32(4, 5, 6, 7);
4295 let r = _mm_unpacklo_epi32(a, b);
4296 let e = _mm_setr_epi32(0, 4, 1, 5);
4297 assert_eq_m128i(r, e);
4298 }
4299
4300 #[simd_test(enable = "sse2")]
4301 unsafe fn test_mm_unpacklo_epi64() {
4302 let a = _mm_setr_epi64x(0, 1);
4303 let b = _mm_setr_epi64x(2, 3);
4304 let r = _mm_unpacklo_epi64(a, b);
4305 let e = _mm_setr_epi64x(0, 2);
4306 assert_eq_m128i(r, e);
4307 }
4308
4309 #[simd_test(enable = "sse2")]
4310 unsafe fn test_mm_add_sd() {
4311 let a = _mm_setr_pd(1.0, 2.0);
4312 let b = _mm_setr_pd(5.0, 10.0);
4313 let r = _mm_add_sd(a, b);
4314 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4315 }
4316
4317 #[simd_test(enable = "sse2")]
4318 unsafe fn test_mm_add_pd() {
4319 let a = _mm_setr_pd(1.0, 2.0);
4320 let b = _mm_setr_pd(5.0, 10.0);
4321 let r = _mm_add_pd(a, b);
4322 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4323 }
4324
4325 #[simd_test(enable = "sse2")]
4326 unsafe fn test_mm_div_sd() {
4327 let a = _mm_setr_pd(1.0, 2.0);
4328 let b = _mm_setr_pd(5.0, 10.0);
4329 let r = _mm_div_sd(a, b);
4330 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4331 }
4332
4333 #[simd_test(enable = "sse2")]
4334 unsafe fn test_mm_div_pd() {
4335 let a = _mm_setr_pd(1.0, 2.0);
4336 let b = _mm_setr_pd(5.0, 10.0);
4337 let r = _mm_div_pd(a, b);
4338 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4339 }
4340
4341 #[simd_test(enable = "sse2")]
4342 unsafe fn test_mm_max_sd() {
4343 let a = _mm_setr_pd(1.0, 2.0);
4344 let b = _mm_setr_pd(5.0, 10.0);
4345 let r = _mm_max_sd(a, b);
4346 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4347 }
4348
4349 #[simd_test(enable = "sse2")]
4350 unsafe fn test_mm_max_pd() {
4351 let a = _mm_setr_pd(1.0, 2.0);
4352 let b = _mm_setr_pd(5.0, 10.0);
4353 let r = _mm_max_pd(a, b);
4354 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4355
4356 let a = _mm_setr_pd(-0.0, 0.0);
4358 let b = _mm_setr_pd(0.0, 0.0);
4359 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4360 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4361 let a: [u8; 16] = transmute(a);
4362 let b: [u8; 16] = transmute(b);
4363 assert_eq!(r1, b);
4364 assert_eq!(r2, a);
4365 assert_ne!(a, b); }
4367
4368 #[simd_test(enable = "sse2")]
4369 unsafe fn test_mm_min_sd() {
4370 let a = _mm_setr_pd(1.0, 2.0);
4371 let b = _mm_setr_pd(5.0, 10.0);
4372 let r = _mm_min_sd(a, b);
4373 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4374 }
4375
4376 #[simd_test(enable = "sse2")]
4377 unsafe fn test_mm_min_pd() {
4378 let a = _mm_setr_pd(1.0, 2.0);
4379 let b = _mm_setr_pd(5.0, 10.0);
4380 let r = _mm_min_pd(a, b);
4381 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4382
4383 let a = _mm_setr_pd(-0.0, 0.0);
4385 let b = _mm_setr_pd(0.0, 0.0);
4386 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4387 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4388 let a: [u8; 16] = transmute(a);
4389 let b: [u8; 16] = transmute(b);
4390 assert_eq!(r1, b);
4391 assert_eq!(r2, a);
4392 assert_ne!(a, b); }
4394
4395 #[simd_test(enable = "sse2")]
4396 unsafe fn test_mm_mul_sd() {
4397 let a = _mm_setr_pd(1.0, 2.0);
4398 let b = _mm_setr_pd(5.0, 10.0);
4399 let r = _mm_mul_sd(a, b);
4400 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4401 }
4402
4403 #[simd_test(enable = "sse2")]
4404 unsafe fn test_mm_mul_pd() {
4405 let a = _mm_setr_pd(1.0, 2.0);
4406 let b = _mm_setr_pd(5.0, 10.0);
4407 let r = _mm_mul_pd(a, b);
4408 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4409 }
4410
4411 #[simd_test(enable = "sse2")]
4412 unsafe fn test_mm_sqrt_sd() {
4413 let a = _mm_setr_pd(1.0, 2.0);
4414 let b = _mm_setr_pd(5.0, 10.0);
4415 let r = _mm_sqrt_sd(a, b);
4416 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4417 }
4418
4419 #[simd_test(enable = "sse2")]
4420 unsafe fn test_mm_sqrt_pd() {
4421 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4422 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4423 }
4424
4425 #[simd_test(enable = "sse2")]
4426 unsafe fn test_mm_sub_sd() {
4427 let a = _mm_setr_pd(1.0, 2.0);
4428 let b = _mm_setr_pd(5.0, 10.0);
4429 let r = _mm_sub_sd(a, b);
4430 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4431 }
4432
4433 #[simd_test(enable = "sse2")]
4434 unsafe fn test_mm_sub_pd() {
4435 let a = _mm_setr_pd(1.0, 2.0);
4436 let b = _mm_setr_pd(5.0, 10.0);
4437 let r = _mm_sub_pd(a, b);
4438 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4439 }
4440
4441 #[simd_test(enable = "sse2")]
4442 unsafe fn test_mm_and_pd() {
4443 let a = transmute(u64x2::splat(5));
4444 let b = transmute(u64x2::splat(3));
4445 let r = _mm_and_pd(a, b);
4446 let e = transmute(u64x2::splat(1));
4447 assert_eq_m128d(r, e);
4448 }
4449
4450 #[simd_test(enable = "sse2")]
4451 unsafe fn test_mm_andnot_pd() {
4452 let a = transmute(u64x2::splat(5));
4453 let b = transmute(u64x2::splat(3));
4454 let r = _mm_andnot_pd(a, b);
4455 let e = transmute(u64x2::splat(2));
4456 assert_eq_m128d(r, e);
4457 }
4458
4459 #[simd_test(enable = "sse2")]
4460 unsafe fn test_mm_or_pd() {
4461 let a = transmute(u64x2::splat(5));
4462 let b = transmute(u64x2::splat(3));
4463 let r = _mm_or_pd(a, b);
4464 let e = transmute(u64x2::splat(7));
4465 assert_eq_m128d(r, e);
4466 }
4467
4468 #[simd_test(enable = "sse2")]
4469 unsafe fn test_mm_xor_pd() {
4470 let a = transmute(u64x2::splat(5));
4471 let b = transmute(u64x2::splat(3));
4472 let r = _mm_xor_pd(a, b);
4473 let e = transmute(u64x2::splat(6));
4474 assert_eq_m128d(r, e);
4475 }
4476
4477 #[simd_test(enable = "sse2")]
4478 unsafe fn test_mm_cmpeq_sd() {
4479 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4480 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4481 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4482 assert_eq_m128i(r, e);
4483 }
4484
4485 #[simd_test(enable = "sse2")]
4486 unsafe fn test_mm_cmplt_sd() {
4487 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4488 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4489 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4490 assert_eq_m128i(r, e);
4491 }
4492
4493 #[simd_test(enable = "sse2")]
4494 unsafe fn test_mm_cmple_sd() {
4495 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4496 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4497 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4498 assert_eq_m128i(r, e);
4499 }
4500
4501 #[simd_test(enable = "sse2")]
4502 unsafe fn test_mm_cmpgt_sd() {
4503 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4504 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4505 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4506 assert_eq_m128i(r, e);
4507 }
4508
4509 #[simd_test(enable = "sse2")]
4510 unsafe fn test_mm_cmpge_sd() {
4511 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4512 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4513 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4514 assert_eq_m128i(r, e);
4515 }
4516
4517 #[simd_test(enable = "sse2")]
4518 unsafe fn test_mm_cmpord_sd() {
4519 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4520 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4521 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4522 assert_eq_m128i(r, e);
4523 }
4524
4525 #[simd_test(enable = "sse2")]
4526 unsafe fn test_mm_cmpunord_sd() {
4527 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4528 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4529 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4530 assert_eq_m128i(r, e);
4531 }
4532
4533 #[simd_test(enable = "sse2")]
4534 unsafe fn test_mm_cmpneq_sd() {
4535 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4536 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4537 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4538 assert_eq_m128i(r, e);
4539 }
4540
4541 #[simd_test(enable = "sse2")]
4542 unsafe fn test_mm_cmpnlt_sd() {
4543 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4544 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4545 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4546 assert_eq_m128i(r, e);
4547 }
4548
4549 #[simd_test(enable = "sse2")]
4550 unsafe fn test_mm_cmpnle_sd() {
4551 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4552 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4553 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4554 assert_eq_m128i(r, e);
4555 }
4556
4557 #[simd_test(enable = "sse2")]
4558 unsafe fn test_mm_cmpngt_sd() {
4559 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4560 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4561 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4562 assert_eq_m128i(r, e);
4563 }
4564
4565 #[simd_test(enable = "sse2")]
4566 unsafe fn test_mm_cmpnge_sd() {
4567 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4568 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4569 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4570 assert_eq_m128i(r, e);
4571 }
4572
4573 #[simd_test(enable = "sse2")]
4574 unsafe fn test_mm_cmpeq_pd() {
4575 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4576 let e = _mm_setr_epi64x(!0, 0);
4577 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4578 assert_eq_m128i(r, e);
4579 }
4580
4581 #[simd_test(enable = "sse2")]
4582 unsafe fn test_mm_cmplt_pd() {
4583 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4584 let e = _mm_setr_epi64x(0, !0);
4585 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4586 assert_eq_m128i(r, e);
4587 }
4588
4589 #[simd_test(enable = "sse2")]
4590 unsafe fn test_mm_cmple_pd() {
4591 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4592 let e = _mm_setr_epi64x(!0, !0);
4593 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4594 assert_eq_m128i(r, e);
4595 }
4596
4597 #[simd_test(enable = "sse2")]
4598 unsafe fn test_mm_cmpgt_pd() {
4599 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4600 let e = _mm_setr_epi64x(0, 0);
4601 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4602 assert_eq_m128i(r, e);
4603 }
4604
4605 #[simd_test(enable = "sse2")]
4606 unsafe fn test_mm_cmpge_pd() {
4607 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4608 let e = _mm_setr_epi64x(!0, 0);
4609 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4610 assert_eq_m128i(r, e);
4611 }
4612
4613 #[simd_test(enable = "sse2")]
4614 unsafe fn test_mm_cmpord_pd() {
4615 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4616 let e = _mm_setr_epi64x(0, !0);
4617 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4618 assert_eq_m128i(r, e);
4619 }
4620
4621 #[simd_test(enable = "sse2")]
4622 unsafe fn test_mm_cmpunord_pd() {
4623 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4624 let e = _mm_setr_epi64x(!0, 0);
4625 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4626 assert_eq_m128i(r, e);
4627 }
4628
4629 #[simd_test(enable = "sse2")]
4630 unsafe fn test_mm_cmpneq_pd() {
4631 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4632 let e = _mm_setr_epi64x(!0, !0);
4633 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4634 assert_eq_m128i(r, e);
4635 }
4636
4637 #[simd_test(enable = "sse2")]
4638 unsafe fn test_mm_cmpnlt_pd() {
4639 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4640 let e = _mm_setr_epi64x(0, 0);
4641 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4642 assert_eq_m128i(r, e);
4643 }
4644
4645 #[simd_test(enable = "sse2")]
4646 unsafe fn test_mm_cmpnle_pd() {
4647 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4648 let e = _mm_setr_epi64x(0, 0);
4649 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4650 assert_eq_m128i(r, e);
4651 }
4652
4653 #[simd_test(enable = "sse2")]
4654 unsafe fn test_mm_cmpngt_pd() {
4655 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4656 let e = _mm_setr_epi64x(0, !0);
4657 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4658 assert_eq_m128i(r, e);
4659 }
4660
4661 #[simd_test(enable = "sse2")]
4662 unsafe fn test_mm_cmpnge_pd() {
4663 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4664 let e = _mm_setr_epi64x(0, !0);
4665 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4666 assert_eq_m128i(r, e);
4667 }
4668
4669 #[simd_test(enable = "sse2")]
4670 unsafe fn test_mm_comieq_sd() {
4671 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4672 assert!(_mm_comieq_sd(a, b) != 0);
4673
4674 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4675 assert!(_mm_comieq_sd(a, b) == 0);
4676 }
4677
4678 #[simd_test(enable = "sse2")]
4679 unsafe fn test_mm_comilt_sd() {
4680 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4681 assert!(_mm_comilt_sd(a, b) == 0);
4682 }
4683
4684 #[simd_test(enable = "sse2")]
4685 unsafe fn test_mm_comile_sd() {
4686 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4687 assert!(_mm_comile_sd(a, b) != 0);
4688 }
4689
4690 #[simd_test(enable = "sse2")]
4691 unsafe fn test_mm_comigt_sd() {
4692 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4693 assert!(_mm_comigt_sd(a, b) == 0);
4694 }
4695
4696 #[simd_test(enable = "sse2")]
4697 unsafe fn test_mm_comige_sd() {
4698 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4699 assert!(_mm_comige_sd(a, b) != 0);
4700 }
4701
4702 #[simd_test(enable = "sse2")]
4703 unsafe fn test_mm_comineq_sd() {
4704 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4705 assert!(_mm_comineq_sd(a, b) == 0);
4706 }
4707
4708 #[simd_test(enable = "sse2")]
4709 unsafe fn test_mm_ucomieq_sd() {
4710 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4711 assert!(_mm_ucomieq_sd(a, b) != 0);
4712
4713 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4714 assert!(_mm_ucomieq_sd(a, b) == 0);
4715 }
4716
4717 #[simd_test(enable = "sse2")]
4718 unsafe fn test_mm_ucomilt_sd() {
4719 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4720 assert!(_mm_ucomilt_sd(a, b) == 0);
4721 }
4722
4723 #[simd_test(enable = "sse2")]
4724 unsafe fn test_mm_ucomile_sd() {
4725 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4726 assert!(_mm_ucomile_sd(a, b) != 0);
4727 }
4728
4729 #[simd_test(enable = "sse2")]
4730 unsafe fn test_mm_ucomigt_sd() {
4731 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4732 assert!(_mm_ucomigt_sd(a, b) == 0);
4733 }
4734
4735 #[simd_test(enable = "sse2")]
4736 unsafe fn test_mm_ucomige_sd() {
4737 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4738 assert!(_mm_ucomige_sd(a, b) != 0);
4739 }
4740
4741 #[simd_test(enable = "sse2")]
4742 unsafe fn test_mm_ucomineq_sd() {
4743 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4744 assert!(_mm_ucomineq_sd(a, b) == 0);
4745 }
4746
4747 #[simd_test(enable = "sse2")]
4748 unsafe fn test_mm_movemask_pd() {
4749 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4750 assert_eq!(r, 0b01);
4751
4752 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4753 assert_eq!(r, 0b11);
4754 }
4755
4756 #[repr(align(16))]
4757 struct Memory {
4758 data: [f64; 4],
4759 }
4760
4761 #[simd_test(enable = "sse2")]
4762 unsafe fn test_mm_load_pd() {
4763 let mem = Memory {
4764 data: [1.0f64, 2.0, 3.0, 4.0],
4765 };
4766 let vals = &mem.data;
4767 let d = vals.as_ptr();
4768
4769 let r = _mm_load_pd(d);
4770 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4771 }
4772
4773 #[simd_test(enable = "sse2")]
4774 unsafe fn test_mm_load_sd() {
4775 let a = 1.;
4776 let expected = _mm_setr_pd(a, 0.);
4777 let r = _mm_load_sd(&a);
4778 assert_eq_m128d(r, expected);
4779 }
4780
4781 #[simd_test(enable = "sse2")]
4782 unsafe fn test_mm_loadh_pd() {
4783 let a = _mm_setr_pd(1., 2.);
4784 let b = 3.;
4785 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4786 let r = _mm_loadh_pd(a, &b);
4787 assert_eq_m128d(r, expected);
4788 }
4789
4790 #[simd_test(enable = "sse2")]
4791 unsafe fn test_mm_loadl_pd() {
4792 let a = _mm_setr_pd(1., 2.);
4793 let b = 3.;
4794 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4795 let r = _mm_loadl_pd(a, &b);
4796 assert_eq_m128d(r, expected);
4797 }
4798
4799 #[simd_test(enable = "sse2")]
4800 #[cfg_attr(miri, ignore)]
4803 unsafe fn test_mm_stream_pd() {
4804 #[repr(align(128))]
4805 struct Memory {
4806 pub data: [f64; 2],
4807 }
4808 let a = _mm_set1_pd(7.0);
4809 let mut mem = Memory { data: [-1.0; 2] };
4810
4811 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4812 for i in 0..2 {
4813 assert_eq!(mem.data[i], get_m128d(a, i));
4814 }
4815 }
4816
4817 #[simd_test(enable = "sse2")]
4818 unsafe fn test_mm_store_sd() {
4819 let mut dest = 0.;
4820 let a = _mm_setr_pd(1., 2.);
4821 _mm_store_sd(&mut dest, a);
4822 assert_eq!(dest, _mm_cvtsd_f64(a));
4823 }
4824
4825 #[simd_test(enable = "sse2")]
4826 unsafe fn test_mm_store_pd() {
4827 let mut mem = Memory { data: [0.0f64; 4] };
4828 let vals = &mut mem.data;
4829 let a = _mm_setr_pd(1.0, 2.0);
4830 let d = vals.as_mut_ptr();
4831
4832 _mm_store_pd(d, *black_box(&a));
4833 assert_eq!(vals[0], 1.0);
4834 assert_eq!(vals[1], 2.0);
4835 }
4836
4837 #[simd_test(enable = "sse2")]
4838 unsafe fn test_mm_storeu_pd() {
4839 let mut mem = Memory { data: [0.0f64; 4] };
4840 let vals = &mut mem.data;
4841 let a = _mm_setr_pd(1.0, 2.0);
4842
4843 let mut ofs = 0;
4844 let mut p = vals.as_mut_ptr();
4845
4846 if (p as usize) & 0xf == 0 {
4848 ofs = 1;
4849 p = p.add(1);
4850 }
4851
4852 _mm_storeu_pd(p, *black_box(&a));
4853
4854 if ofs > 0 {
4855 assert_eq!(vals[ofs - 1], 0.0);
4856 }
4857 assert_eq!(vals[ofs + 0], 1.0);
4858 assert_eq!(vals[ofs + 1], 2.0);
4859 }
4860
4861 #[simd_test(enable = "sse2")]
4862 unsafe fn test_mm_storeu_si16() {
4863 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4864 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4865 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4866 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4867 assert_eq_m128i(r, e);
4868 }
4869
4870 #[simd_test(enable = "sse2")]
4871 unsafe fn test_mm_storeu_si32() {
4872 let a = _mm_setr_epi32(1, 2, 3, 4);
4873 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4874 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4875 let e = _mm_setr_epi32(1, 6, 7, 8);
4876 assert_eq_m128i(r, e);
4877 }
4878
4879 #[simd_test(enable = "sse2")]
4880 unsafe fn test_mm_storeu_si64() {
4881 let a = _mm_setr_epi64x(1, 2);
4882 let mut r = _mm_setr_epi64x(3, 4);
4883 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4884 let e = _mm_setr_epi64x(1, 4);
4885 assert_eq_m128i(r, e);
4886 }
4887
4888 #[simd_test(enable = "sse2")]
4889 unsafe fn test_mm_store1_pd() {
4890 let mut mem = Memory { data: [0.0f64; 4] };
4891 let vals = &mut mem.data;
4892 let a = _mm_setr_pd(1.0, 2.0);
4893 let d = vals.as_mut_ptr();
4894
4895 _mm_store1_pd(d, *black_box(&a));
4896 assert_eq!(vals[0], 1.0);
4897 assert_eq!(vals[1], 1.0);
4898 }
4899
4900 #[simd_test(enable = "sse2")]
4901 unsafe fn test_mm_store_pd1() {
4902 let mut mem = Memory { data: [0.0f64; 4] };
4903 let vals = &mut mem.data;
4904 let a = _mm_setr_pd(1.0, 2.0);
4905 let d = vals.as_mut_ptr();
4906
4907 _mm_store_pd1(d, *black_box(&a));
4908 assert_eq!(vals[0], 1.0);
4909 assert_eq!(vals[1], 1.0);
4910 }
4911
4912 #[simd_test(enable = "sse2")]
4913 unsafe fn test_mm_storer_pd() {
4914 let mut mem = Memory { data: [0.0f64; 4] };
4915 let vals = &mut mem.data;
4916 let a = _mm_setr_pd(1.0, 2.0);
4917 let d = vals.as_mut_ptr();
4918
4919 _mm_storer_pd(d, *black_box(&a));
4920 assert_eq!(vals[0], 2.0);
4921 assert_eq!(vals[1], 1.0);
4922 }
4923
4924 #[simd_test(enable = "sse2")]
4925 unsafe fn test_mm_storeh_pd() {
4926 let mut dest = 0.;
4927 let a = _mm_setr_pd(1., 2.);
4928 _mm_storeh_pd(&mut dest, a);
4929 assert_eq!(dest, get_m128d(a, 1));
4930 }
4931
4932 #[simd_test(enable = "sse2")]
4933 unsafe fn test_mm_storel_pd() {
4934 let mut dest = 0.;
4935 let a = _mm_setr_pd(1., 2.);
4936 _mm_storel_pd(&mut dest, a);
4937 assert_eq!(dest, _mm_cvtsd_f64(a));
4938 }
4939
4940 #[simd_test(enable = "sse2")]
4941 unsafe fn test_mm_loadr_pd() {
4942 let mut mem = Memory {
4943 data: [1.0f64, 2.0, 3.0, 4.0],
4944 };
4945 let vals = &mut mem.data;
4946 let d = vals.as_ptr();
4947
4948 let r = _mm_loadr_pd(d);
4949 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4950 }
4951
4952 #[simd_test(enable = "sse2")]
4953 unsafe fn test_mm_loadu_pd() {
4954 let mut mem = Memory {
4955 data: [1.0f64, 2.0, 3.0, 4.0],
4956 };
4957 let vals = &mut mem.data;
4958 let mut d = vals.as_ptr();
4959
4960 let mut offset = 0;
4962 if (d as usize) & 0xf == 0 {
4963 offset = 1;
4964 d = d.add(offset);
4965 }
4966
4967 let r = _mm_loadu_pd(d);
4968 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4969 assert_eq_m128d(r, e);
4970 }
4971
4972 #[simd_test(enable = "sse2")]
4973 unsafe fn test_mm_loadu_si16() {
4974 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4975 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4976 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4977 }
4978
4979 #[simd_test(enable = "sse2")]
4980 unsafe fn test_mm_loadu_si32() {
4981 let a = _mm_setr_epi32(1, 2, 3, 4);
4982 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4983 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4984 }
4985
4986 #[simd_test(enable = "sse2")]
4987 unsafe fn test_mm_loadu_si64() {
4988 let a = _mm_setr_epi64x(5, 6);
4989 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4990 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4991 }
4992
4993 #[simd_test(enable = "sse2")]
4994 unsafe fn test_mm_cvtpd_ps() {
4995 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4996 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4997
4998 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4999 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5000
5001 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5002 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5003
5004 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5005 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5006 }
5007
5008 #[simd_test(enable = "sse2")]
5009 unsafe fn test_mm_cvtps_pd() {
5010 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5011 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5012
5013 let r = _mm_cvtps_pd(_mm_setr_ps(
5014 f32::MAX,
5015 f32::INFINITY,
5016 f32::NEG_INFINITY,
5017 f32::MIN,
5018 ));
5019 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5020 }
5021
5022 #[simd_test(enable = "sse2")]
5023 unsafe fn test_mm_cvtpd_epi32() {
5024 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5025 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5026
5027 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5028 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5029
5030 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5031 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5032
5033 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5034 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5035
5036 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5037 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5038 }
5039
5040 #[simd_test(enable = "sse2")]
5041 unsafe fn test_mm_cvtsd_si32() {
5042 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5043 assert_eq!(r, -2);
5044
5045 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5046 assert_eq!(r, i32::MIN);
5047
5048 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5049 assert_eq!(r, i32::MIN);
5050 }
5051
5052 #[simd_test(enable = "sse2")]
5053 unsafe fn test_mm_cvtsd_ss() {
5054 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5055 let b = _mm_setr_pd(2.0, -5.0);
5056
5057 let r = _mm_cvtsd_ss(a, b);
5058
5059 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5060
5061 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5062 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5063
5064 let r = _mm_cvtsd_ss(a, b);
5065
5066 assert_eq_m128(
5067 r,
5068 _mm_setr_ps(
5069 f32::INFINITY,
5070 f32::NEG_INFINITY,
5071 f32::MAX,
5072 f32::NEG_INFINITY,
5073 ),
5074 );
5075 }
5076
5077 #[simd_test(enable = "sse2")]
5078 unsafe fn test_mm_cvtsd_f64() {
5079 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5080 assert_eq!(r, -1.1);
5081 }
5082
5083 #[simd_test(enable = "sse2")]
5084 unsafe fn test_mm_cvtss_sd() {
5085 let a = _mm_setr_pd(-1.1, 2.2);
5086 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5087
5088 let r = _mm_cvtss_sd(a, b);
5089 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5090
5091 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5092 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5093
5094 let r = _mm_cvtss_sd(a, b);
5095 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5096 }
5097
5098 #[simd_test(enable = "sse2")]
5099 unsafe fn test_mm_cvttpd_epi32() {
5100 let a = _mm_setr_pd(-1.1, 2.2);
5101 let r = _mm_cvttpd_epi32(a);
5102 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5103
5104 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5105 let r = _mm_cvttpd_epi32(a);
5106 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5107 }
5108
5109 #[simd_test(enable = "sse2")]
5110 unsafe fn test_mm_cvttsd_si32() {
5111 let a = _mm_setr_pd(-1.1, 2.2);
5112 let r = _mm_cvttsd_si32(a);
5113 assert_eq!(r, -1);
5114
5115 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5116 let r = _mm_cvttsd_si32(a);
5117 assert_eq!(r, i32::MIN);
5118 }
5119
5120 #[simd_test(enable = "sse2")]
5121 unsafe fn test_mm_cvttps_epi32() {
5122 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5123 let r = _mm_cvttps_epi32(a);
5124 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5125
5126 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5127 let r = _mm_cvttps_epi32(a);
5128 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5129 }
5130
5131 #[simd_test(enable = "sse2")]
5132 unsafe fn test_mm_set_sd() {
5133 let r = _mm_set_sd(-1.0_f64);
5134 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5135 }
5136
5137 #[simd_test(enable = "sse2")]
5138 unsafe fn test_mm_set1_pd() {
5139 let r = _mm_set1_pd(-1.0_f64);
5140 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5141 }
5142
5143 #[simd_test(enable = "sse2")]
5144 unsafe fn test_mm_set_pd1() {
5145 let r = _mm_set_pd1(-2.0_f64);
5146 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5147 }
5148
5149 #[simd_test(enable = "sse2")]
5150 unsafe fn test_mm_set_pd() {
5151 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5152 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5153 }
5154
5155 #[simd_test(enable = "sse2")]
5156 unsafe fn test_mm_setr_pd() {
5157 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5158 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5159 }
5160
5161 #[simd_test(enable = "sse2")]
5162 unsafe fn test_mm_setzero_pd() {
5163 let r = _mm_setzero_pd();
5164 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5165 }
5166
5167 #[simd_test(enable = "sse2")]
5168 unsafe fn test_mm_load1_pd() {
5169 let d = -5.0;
5170 let r = _mm_load1_pd(&d);
5171 assert_eq_m128d(r, _mm_setr_pd(d, d));
5172 }
5173
5174 #[simd_test(enable = "sse2")]
5175 unsafe fn test_mm_load_pd1() {
5176 let d = -5.0;
5177 let r = _mm_load_pd1(&d);
5178 assert_eq_m128d(r, _mm_setr_pd(d, d));
5179 }
5180
5181 #[simd_test(enable = "sse2")]
5182 unsafe fn test_mm_unpackhi_pd() {
5183 let a = _mm_setr_pd(1.0, 2.0);
5184 let b = _mm_setr_pd(3.0, 4.0);
5185 let r = _mm_unpackhi_pd(a, b);
5186 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5187 }
5188
5189 #[simd_test(enable = "sse2")]
5190 unsafe fn test_mm_unpacklo_pd() {
5191 let a = _mm_setr_pd(1.0, 2.0);
5192 let b = _mm_setr_pd(3.0, 4.0);
5193 let r = _mm_unpacklo_pd(a, b);
5194 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5195 }
5196
5197 #[simd_test(enable = "sse2")]
5198 unsafe fn test_mm_shuffle_pd() {
5199 let a = _mm_setr_pd(1., 2.);
5200 let b = _mm_setr_pd(3., 4.);
5201 let expected = _mm_setr_pd(1., 3.);
5202 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5203 assert_eq_m128d(r, expected);
5204 }
5205
5206 #[simd_test(enable = "sse2")]
5207 unsafe fn test_mm_move_sd() {
5208 let a = _mm_setr_pd(1., 2.);
5209 let b = _mm_setr_pd(3., 4.);
5210 let expected = _mm_setr_pd(3., 2.);
5211 let r = _mm_move_sd(a, b);
5212 assert_eq_m128d(r, expected);
5213 }
5214
5215 #[simd_test(enable = "sse2")]
5216 unsafe fn test_mm_castpd_ps() {
5217 let a = _mm_set1_pd(0.);
5218 let expected = _mm_set1_ps(0.);
5219 let r = _mm_castpd_ps(a);
5220 assert_eq_m128(r, expected);
5221 }
5222
5223 #[simd_test(enable = "sse2")]
5224 unsafe fn test_mm_castpd_si128() {
5225 let a = _mm_set1_pd(0.);
5226 let expected = _mm_set1_epi64x(0);
5227 let r = _mm_castpd_si128(a);
5228 assert_eq_m128i(r, expected);
5229 }
5230
5231 #[simd_test(enable = "sse2")]
5232 unsafe fn test_mm_castps_pd() {
5233 let a = _mm_set1_ps(0.);
5234 let expected = _mm_set1_pd(0.);
5235 let r = _mm_castps_pd(a);
5236 assert_eq_m128d(r, expected);
5237 }
5238
5239 #[simd_test(enable = "sse2")]
5240 unsafe fn test_mm_castps_si128() {
5241 let a = _mm_set1_ps(0.);
5242 let expected = _mm_set1_epi32(0);
5243 let r = _mm_castps_si128(a);
5244 assert_eq_m128i(r, expected);
5245 }
5246
5247 #[simd_test(enable = "sse2")]
5248 unsafe fn test_mm_castsi128_pd() {
5249 let a = _mm_set1_epi64x(0);
5250 let expected = _mm_set1_pd(0.);
5251 let r = _mm_castsi128_pd(a);
5252 assert_eq_m128d(r, expected);
5253 }
5254
5255 #[simd_test(enable = "sse2")]
5256 unsafe fn test_mm_castsi128_ps() {
5257 let a = _mm_set1_epi32(0);
5258 let expected = _mm_set1_ps(0.);
5259 let r = _mm_castsi128_ps(a);
5260 assert_eq_m128(r, expected);
5261 }
5262}