1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23 unsafe { pause() }
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub fn _mm_lfence() {
53 unsafe { lfence() }
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _mm_mfence() {
69 unsafe { mfence() }
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172 transmute(simd_cast::<_, u8x16>(r))
173 }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188 transmute(simd_cast::<_, u16x8>(r))
189 }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe {
205 let r: i32x8 = simd_mul(simd_cast(a.as_i16x8()), simd_cast(b.as_i16x8()));
206 let even: i32x4 = simd_shuffle!(r, r, [0, 2, 4, 6]);
207 let odd: i32x4 = simd_shuffle!(r, r, [1, 3, 5, 7]);
208 simd_add(even, odd).as_m128i()
209 }
210}
211
212#[inline]
217#[target_feature(enable = "sse2")]
218#[cfg_attr(test, assert_instr(pmaxsw))]
219#[stable(feature = "simd_x86", since = "1.27.0")]
220pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
221 unsafe {
222 let a = a.as_i16x8();
223 let b = b.as_i16x8();
224 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
225 }
226}
227
228#[inline]
233#[target_feature(enable = "sse2")]
234#[cfg_attr(test, assert_instr(pmaxub))]
235#[stable(feature = "simd_x86", since = "1.27.0")]
236pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
237 unsafe {
238 let a = a.as_u8x16();
239 let b = b.as_u8x16();
240 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
241 }
242}
243
244#[inline]
249#[target_feature(enable = "sse2")]
250#[cfg_attr(test, assert_instr(pminsw))]
251#[stable(feature = "simd_x86", since = "1.27.0")]
252pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
253 unsafe {
254 let a = a.as_i16x8();
255 let b = b.as_i16x8();
256 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
257 }
258}
259
260#[inline]
265#[target_feature(enable = "sse2")]
266#[cfg_attr(test, assert_instr(pminub))]
267#[stable(feature = "simd_x86", since = "1.27.0")]
268pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
269 unsafe {
270 let a = a.as_u8x16();
271 let b = b.as_u8x16();
272 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
273 }
274}
275
276#[inline]
283#[target_feature(enable = "sse2")]
284#[cfg_attr(test, assert_instr(pmulhw))]
285#[stable(feature = "simd_x86", since = "1.27.0")]
286pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
287 unsafe {
288 let a = simd_cast::<_, i32x8>(a.as_i16x8());
289 let b = simd_cast::<_, i32x8>(b.as_i16x8());
290 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
291 transmute(simd_cast::<i32x8, i16x8>(r))
292 }
293}
294
295#[inline]
302#[target_feature(enable = "sse2")]
303#[cfg_attr(test, assert_instr(pmulhuw))]
304#[stable(feature = "simd_x86", since = "1.27.0")]
305pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
306 unsafe {
307 let a = simd_cast::<_, u32x8>(a.as_u16x8());
308 let b = simd_cast::<_, u32x8>(b.as_u16x8());
309 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
310 transmute(simd_cast::<u32x8, u16x8>(r))
311 }
312}
313
314#[inline]
321#[target_feature(enable = "sse2")]
322#[cfg_attr(test, assert_instr(pmullw))]
323#[stable(feature = "simd_x86", since = "1.27.0")]
324pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
325 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
326}
327
328#[inline]
335#[target_feature(enable = "sse2")]
336#[cfg_attr(test, assert_instr(pmuludq))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
339 unsafe {
340 let a = a.as_u64x2();
341 let b = b.as_u64x2();
342 let mask = u64x2::splat(u32::MAX.into());
343 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
344 }
345}
346
347#[inline]
356#[target_feature(enable = "sse2")]
357#[cfg_attr(test, assert_instr(psadbw))]
358#[stable(feature = "simd_x86", since = "1.27.0")]
359pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
360 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
361}
362
363#[inline]
367#[target_feature(enable = "sse2")]
368#[cfg_attr(test, assert_instr(psubb))]
369#[stable(feature = "simd_x86", since = "1.27.0")]
370pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
371 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
372}
373
374#[inline]
378#[target_feature(enable = "sse2")]
379#[cfg_attr(test, assert_instr(psubw))]
380#[stable(feature = "simd_x86", since = "1.27.0")]
381pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
382 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
383}
384
385#[inline]
389#[target_feature(enable = "sse2")]
390#[cfg_attr(test, assert_instr(psubd))]
391#[stable(feature = "simd_x86", since = "1.27.0")]
392pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
393 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
394}
395
396#[inline]
400#[target_feature(enable = "sse2")]
401#[cfg_attr(test, assert_instr(psubq))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
404 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
405}
406
407#[inline]
412#[target_feature(enable = "sse2")]
413#[cfg_attr(test, assert_instr(psubsb))]
414#[stable(feature = "simd_x86", since = "1.27.0")]
415pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
416 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
417}
418
419#[inline]
424#[target_feature(enable = "sse2")]
425#[cfg_attr(test, assert_instr(psubsw))]
426#[stable(feature = "simd_x86", since = "1.27.0")]
427pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
428 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
429}
430
431#[inline]
436#[target_feature(enable = "sse2")]
437#[cfg_attr(test, assert_instr(psubusb))]
438#[stable(feature = "simd_x86", since = "1.27.0")]
439pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
440 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
441}
442
443#[inline]
448#[target_feature(enable = "sse2")]
449#[cfg_attr(test, assert_instr(psubusw))]
450#[stable(feature = "simd_x86", since = "1.27.0")]
451pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
452 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
453}
454
455#[inline]
459#[target_feature(enable = "sse2")]
460#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
461#[rustc_legacy_const_generics(1)]
462#[stable(feature = "simd_x86", since = "1.27.0")]
463pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
464 static_assert_uimm_bits!(IMM8, 8);
465 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
466}
467
468#[inline]
471#[target_feature(enable = "sse2")]
472unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
473 const fn mask(shift: i32, i: u32) -> u32 {
474 let shift = shift as u32 & 0xff;
475 if shift > 15 { i } else { 16 - shift + i }
476 }
477 transmute::<i8x16, _>(simd_shuffle!(
478 i8x16::ZERO,
479 a.as_i8x16(),
480 [
481 mask(IMM8, 0),
482 mask(IMM8, 1),
483 mask(IMM8, 2),
484 mask(IMM8, 3),
485 mask(IMM8, 4),
486 mask(IMM8, 5),
487 mask(IMM8, 6),
488 mask(IMM8, 7),
489 mask(IMM8, 8),
490 mask(IMM8, 9),
491 mask(IMM8, 10),
492 mask(IMM8, 11),
493 mask(IMM8, 12),
494 mask(IMM8, 13),
495 mask(IMM8, 14),
496 mask(IMM8, 15),
497 ],
498 ))
499}
500
501#[inline]
505#[target_feature(enable = "sse2")]
506#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
507#[rustc_legacy_const_generics(1)]
508#[stable(feature = "simd_x86", since = "1.27.0")]
509pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
510 unsafe {
511 static_assert_uimm_bits!(IMM8, 8);
512 _mm_slli_si128_impl::<IMM8>(a)
513 }
514}
515
516#[inline]
520#[target_feature(enable = "sse2")]
521#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
522#[rustc_legacy_const_generics(1)]
523#[stable(feature = "simd_x86", since = "1.27.0")]
524pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
525 unsafe {
526 static_assert_uimm_bits!(IMM8, 8);
527 _mm_srli_si128_impl::<IMM8>(a)
528 }
529}
530
531#[inline]
535#[target_feature(enable = "sse2")]
536#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
537#[rustc_legacy_const_generics(1)]
538#[stable(feature = "simd_x86", since = "1.27.0")]
539pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
540 static_assert_uimm_bits!(IMM8, 8);
541 unsafe {
542 if IMM8 >= 16 {
543 _mm_setzero_si128()
544 } else {
545 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
546 }
547 }
548}
549
550#[inline]
555#[target_feature(enable = "sse2")]
556#[cfg_attr(test, assert_instr(psllw))]
557#[stable(feature = "simd_x86", since = "1.27.0")]
558pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
559 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
560}
561
562#[inline]
566#[target_feature(enable = "sse2")]
567#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
568#[rustc_legacy_const_generics(1)]
569#[stable(feature = "simd_x86", since = "1.27.0")]
570pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
571 static_assert_uimm_bits!(IMM8, 8);
572 unsafe {
573 if IMM8 >= 32 {
574 _mm_setzero_si128()
575 } else {
576 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
577 }
578 }
579}
580
581#[inline]
586#[target_feature(enable = "sse2")]
587#[cfg_attr(test, assert_instr(pslld))]
588#[stable(feature = "simd_x86", since = "1.27.0")]
589pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
590 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
591}
592
593#[inline]
597#[target_feature(enable = "sse2")]
598#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
599#[rustc_legacy_const_generics(1)]
600#[stable(feature = "simd_x86", since = "1.27.0")]
601pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
602 static_assert_uimm_bits!(IMM8, 8);
603 unsafe {
604 if IMM8 >= 64 {
605 _mm_setzero_si128()
606 } else {
607 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
608 }
609 }
610}
611
612#[inline]
617#[target_feature(enable = "sse2")]
618#[cfg_attr(test, assert_instr(psllq))]
619#[stable(feature = "simd_x86", since = "1.27.0")]
620pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
621 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
622}
623
624#[inline]
629#[target_feature(enable = "sse2")]
630#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
631#[rustc_legacy_const_generics(1)]
632#[stable(feature = "simd_x86", since = "1.27.0")]
633pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
634 static_assert_uimm_bits!(IMM8, 8);
635 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
636}
637
638#[inline]
643#[target_feature(enable = "sse2")]
644#[cfg_attr(test, assert_instr(psraw))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
647 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
648}
649
650#[inline]
655#[target_feature(enable = "sse2")]
656#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
657#[rustc_legacy_const_generics(1)]
658#[stable(feature = "simd_x86", since = "1.27.0")]
659pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
660 static_assert_uimm_bits!(IMM8, 8);
661 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
662}
663
664#[inline]
669#[target_feature(enable = "sse2")]
670#[cfg_attr(test, assert_instr(psrad))]
671#[stable(feature = "simd_x86", since = "1.27.0")]
672pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
673 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
674}
675
676#[inline]
680#[target_feature(enable = "sse2")]
681#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
682#[rustc_legacy_const_generics(1)]
683#[stable(feature = "simd_x86", since = "1.27.0")]
684pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
685 static_assert_uimm_bits!(IMM8, 8);
686 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
687}
688
689#[inline]
692#[target_feature(enable = "sse2")]
693unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
694 const fn mask(shift: i32, i: u32) -> u32 {
695 if (shift as u32) > 15 {
696 i + 16
697 } else {
698 i + (shift as u32)
699 }
700 }
701 let x: i8x16 = simd_shuffle!(
702 a.as_i8x16(),
703 i8x16::ZERO,
704 [
705 mask(IMM8, 0),
706 mask(IMM8, 1),
707 mask(IMM8, 2),
708 mask(IMM8, 3),
709 mask(IMM8, 4),
710 mask(IMM8, 5),
711 mask(IMM8, 6),
712 mask(IMM8, 7),
713 mask(IMM8, 8),
714 mask(IMM8, 9),
715 mask(IMM8, 10),
716 mask(IMM8, 11),
717 mask(IMM8, 12),
718 mask(IMM8, 13),
719 mask(IMM8, 14),
720 mask(IMM8, 15),
721 ],
722 );
723 transmute(x)
724}
725
726#[inline]
731#[target_feature(enable = "sse2")]
732#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
733#[rustc_legacy_const_generics(1)]
734#[stable(feature = "simd_x86", since = "1.27.0")]
735pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
736 static_assert_uimm_bits!(IMM8, 8);
737 unsafe {
738 if IMM8 >= 16 {
739 _mm_setzero_si128()
740 } else {
741 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
742 }
743 }
744}
745
746#[inline]
751#[target_feature(enable = "sse2")]
752#[cfg_attr(test, assert_instr(psrlw))]
753#[stable(feature = "simd_x86", since = "1.27.0")]
754pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
755 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
756}
757
758#[inline]
763#[target_feature(enable = "sse2")]
764#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
765#[rustc_legacy_const_generics(1)]
766#[stable(feature = "simd_x86", since = "1.27.0")]
767pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
768 static_assert_uimm_bits!(IMM8, 8);
769 unsafe {
770 if IMM8 >= 32 {
771 _mm_setzero_si128()
772 } else {
773 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
774 }
775 }
776}
777
778#[inline]
783#[target_feature(enable = "sse2")]
784#[cfg_attr(test, assert_instr(psrld))]
785#[stable(feature = "simd_x86", since = "1.27.0")]
786pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
787 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
788}
789
790#[inline]
795#[target_feature(enable = "sse2")]
796#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
797#[rustc_legacy_const_generics(1)]
798#[stable(feature = "simd_x86", since = "1.27.0")]
799pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
800 static_assert_uimm_bits!(IMM8, 8);
801 unsafe {
802 if IMM8 >= 64 {
803 _mm_setzero_si128()
804 } else {
805 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
806 }
807 }
808}
809
810#[inline]
815#[target_feature(enable = "sse2")]
816#[cfg_attr(test, assert_instr(psrlq))]
817#[stable(feature = "simd_x86", since = "1.27.0")]
818pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
819 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
820}
821
822#[inline]
827#[target_feature(enable = "sse2")]
828#[cfg_attr(test, assert_instr(andps))]
829#[stable(feature = "simd_x86", since = "1.27.0")]
830pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
831 unsafe { simd_and(a, b) }
832}
833
834#[inline]
839#[target_feature(enable = "sse2")]
840#[cfg_attr(test, assert_instr(andnps))]
841#[stable(feature = "simd_x86", since = "1.27.0")]
842pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
843 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
844}
845
846#[inline]
851#[target_feature(enable = "sse2")]
852#[cfg_attr(test, assert_instr(orps))]
853#[stable(feature = "simd_x86", since = "1.27.0")]
854pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
855 unsafe { simd_or(a, b) }
856}
857
858#[inline]
863#[target_feature(enable = "sse2")]
864#[cfg_attr(test, assert_instr(xorps))]
865#[stable(feature = "simd_x86", since = "1.27.0")]
866pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
867 unsafe { simd_xor(a, b) }
868}
869
870#[inline]
874#[target_feature(enable = "sse2")]
875#[cfg_attr(test, assert_instr(pcmpeqb))]
876#[stable(feature = "simd_x86", since = "1.27.0")]
877pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
878 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
879}
880
881#[inline]
885#[target_feature(enable = "sse2")]
886#[cfg_attr(test, assert_instr(pcmpeqw))]
887#[stable(feature = "simd_x86", since = "1.27.0")]
888pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
889 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
890}
891
892#[inline]
896#[target_feature(enable = "sse2")]
897#[cfg_attr(test, assert_instr(pcmpeqd))]
898#[stable(feature = "simd_x86", since = "1.27.0")]
899pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
900 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
901}
902
903#[inline]
907#[target_feature(enable = "sse2")]
908#[cfg_attr(test, assert_instr(pcmpgtb))]
909#[stable(feature = "simd_x86", since = "1.27.0")]
910pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
911 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
912}
913
914#[inline]
918#[target_feature(enable = "sse2")]
919#[cfg_attr(test, assert_instr(pcmpgtw))]
920#[stable(feature = "simd_x86", since = "1.27.0")]
921pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
922 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
923}
924
925#[inline]
929#[target_feature(enable = "sse2")]
930#[cfg_attr(test, assert_instr(pcmpgtd))]
931#[stable(feature = "simd_x86", since = "1.27.0")]
932pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
933 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
934}
935
936#[inline]
940#[target_feature(enable = "sse2")]
941#[cfg_attr(test, assert_instr(pcmpgtb))]
942#[stable(feature = "simd_x86", since = "1.27.0")]
943pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
944 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
945}
946
947#[inline]
951#[target_feature(enable = "sse2")]
952#[cfg_attr(test, assert_instr(pcmpgtw))]
953#[stable(feature = "simd_x86", since = "1.27.0")]
954pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
955 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
956}
957
958#[inline]
962#[target_feature(enable = "sse2")]
963#[cfg_attr(test, assert_instr(pcmpgtd))]
964#[stable(feature = "simd_x86", since = "1.27.0")]
965pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
966 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
967}
968
969#[inline]
974#[target_feature(enable = "sse2")]
975#[cfg_attr(test, assert_instr(cvtdq2pd))]
976#[stable(feature = "simd_x86", since = "1.27.0")]
977pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
978 unsafe {
979 let a = a.as_i32x4();
980 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
981 }
982}
983
984#[inline]
989#[target_feature(enable = "sse2")]
990#[cfg_attr(test, assert_instr(cvtsi2sd))]
991#[stable(feature = "simd_x86", since = "1.27.0")]
992pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
993 unsafe { simd_insert!(a, 0, b as f64) }
994}
995
996#[inline]
1001#[target_feature(enable = "sse2")]
1002#[cfg_attr(test, assert_instr(cvtdq2ps))]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1005 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1006}
1007
1008#[inline]
1013#[target_feature(enable = "sse2")]
1014#[cfg_attr(test, assert_instr(cvtps2dq))]
1015#[stable(feature = "simd_x86", since = "1.27.0")]
1016pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1017 unsafe { transmute(cvtps2dq(a)) }
1018}
1019
1020#[inline]
1025#[target_feature(enable = "sse2")]
1026#[stable(feature = "simd_x86", since = "1.27.0")]
1027pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1028 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1029}
1030
1031#[inline]
1035#[target_feature(enable = "sse2")]
1036#[stable(feature = "simd_x86", since = "1.27.0")]
1037pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1038 unsafe { simd_extract!(a.as_i32x4(), 0) }
1039}
1040
1041#[inline]
1046#[target_feature(enable = "sse2")]
1047#[stable(feature = "simd_x86", since = "1.27.0")]
1049pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1050 unsafe { transmute(i64x2::new(e0, e1)) }
1051}
1052
1053#[inline]
1057#[target_feature(enable = "sse2")]
1058#[stable(feature = "simd_x86", since = "1.27.0")]
1060pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1061 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1062}
1063
1064#[inline]
1068#[target_feature(enable = "sse2")]
1069#[stable(feature = "simd_x86", since = "1.27.0")]
1071pub fn _mm_set_epi16(
1072 e7: i16,
1073 e6: i16,
1074 e5: i16,
1075 e4: i16,
1076 e3: i16,
1077 e2: i16,
1078 e1: i16,
1079 e0: i16,
1080) -> __m128i {
1081 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1082}
1083
1084#[inline]
1088#[target_feature(enable = "sse2")]
1089#[stable(feature = "simd_x86", since = "1.27.0")]
1091pub fn _mm_set_epi8(
1092 e15: i8,
1093 e14: i8,
1094 e13: i8,
1095 e12: i8,
1096 e11: i8,
1097 e10: i8,
1098 e9: i8,
1099 e8: i8,
1100 e7: i8,
1101 e6: i8,
1102 e5: i8,
1103 e4: i8,
1104 e3: i8,
1105 e2: i8,
1106 e1: i8,
1107 e0: i8,
1108) -> __m128i {
1109 unsafe {
1110 #[rustfmt::skip]
1111 transmute(i8x16::new(
1112 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1113 ))
1114 }
1115}
1116
1117#[inline]
1121#[target_feature(enable = "sse2")]
1122#[stable(feature = "simd_x86", since = "1.27.0")]
1124pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1125 _mm_set_epi64x(a, a)
1126}
1127
1128#[inline]
1132#[target_feature(enable = "sse2")]
1133#[stable(feature = "simd_x86", since = "1.27.0")]
1135pub fn _mm_set1_epi32(a: i32) -> __m128i {
1136 _mm_set_epi32(a, a, a, a)
1137}
1138
1139#[inline]
1143#[target_feature(enable = "sse2")]
1144#[stable(feature = "simd_x86", since = "1.27.0")]
1146pub fn _mm_set1_epi16(a: i16) -> __m128i {
1147 _mm_set_epi16(a, a, a, a, a, a, a, a)
1148}
1149
1150#[inline]
1154#[target_feature(enable = "sse2")]
1155#[stable(feature = "simd_x86", since = "1.27.0")]
1157pub fn _mm_set1_epi8(a: i8) -> __m128i {
1158 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1159}
1160
1161#[inline]
1165#[target_feature(enable = "sse2")]
1166#[stable(feature = "simd_x86", since = "1.27.0")]
1168pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1169 _mm_set_epi32(e0, e1, e2, e3)
1170}
1171
1172#[inline]
1176#[target_feature(enable = "sse2")]
1177#[stable(feature = "simd_x86", since = "1.27.0")]
1179pub fn _mm_setr_epi16(
1180 e7: i16,
1181 e6: i16,
1182 e5: i16,
1183 e4: i16,
1184 e3: i16,
1185 e2: i16,
1186 e1: i16,
1187 e0: i16,
1188) -> __m128i {
1189 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1190}
1191
1192#[inline]
1196#[target_feature(enable = "sse2")]
1197#[stable(feature = "simd_x86", since = "1.27.0")]
1199pub fn _mm_setr_epi8(
1200 e15: i8,
1201 e14: i8,
1202 e13: i8,
1203 e12: i8,
1204 e11: i8,
1205 e10: i8,
1206 e9: i8,
1207 e8: i8,
1208 e7: i8,
1209 e6: i8,
1210 e5: i8,
1211 e4: i8,
1212 e3: i8,
1213 e2: i8,
1214 e1: i8,
1215 e0: i8,
1216) -> __m128i {
1217 #[rustfmt::skip]
1218 _mm_set_epi8(
1219 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1220 )
1221}
1222
1223#[inline]
1227#[target_feature(enable = "sse2")]
1228#[cfg_attr(test, assert_instr(xorps))]
1229#[stable(feature = "simd_x86", since = "1.27.0")]
1230pub fn _mm_setzero_si128() -> __m128i {
1231 const { unsafe { mem::zeroed() } }
1232}
1233
1234#[inline]
1238#[target_feature(enable = "sse2")]
1239#[stable(feature = "simd_x86", since = "1.27.0")]
1240pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1241 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1242}
1243
1244#[inline]
1250#[target_feature(enable = "sse2")]
1251#[cfg_attr(
1252 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1253 assert_instr(movaps)
1254)]
1255#[stable(feature = "simd_x86", since = "1.27.0")]
1256pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1257 *mem_addr
1258}
1259
1260#[inline]
1266#[target_feature(enable = "sse2")]
1267#[cfg_attr(test, assert_instr(movups))]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1270 let mut dst: __m128i = _mm_undefined_si128();
1271 ptr::copy_nonoverlapping(
1272 mem_addr as *const u8,
1273 ptr::addr_of_mut!(dst) as *mut u8,
1274 mem::size_of::<__m128i>(),
1275 );
1276 dst
1277}
1278
1279#[inline]
1299#[target_feature(enable = "sse2")]
1300#[cfg_attr(test, assert_instr(maskmovdqu))]
1301#[stable(feature = "simd_x86", since = "1.27.0")]
1302pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1303 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1304}
1305
1306#[inline]
1312#[target_feature(enable = "sse2")]
1313#[cfg_attr(
1314 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1315 assert_instr(movaps)
1316)]
1317#[stable(feature = "simd_x86", since = "1.27.0")]
1318pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1319 *mem_addr = a;
1320}
1321
1322#[inline]
1328#[target_feature(enable = "sse2")]
1329#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1331pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1332 mem_addr.write_unaligned(a);
1333}
1334
1335#[inline]
1341#[target_feature(enable = "sse2")]
1342#[stable(feature = "simd_x86", since = "1.27.0")]
1343pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1344 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1345}
1346
1347#[inline]
1362#[target_feature(enable = "sse2")]
1363#[cfg_attr(test, assert_instr(movntdq))]
1364#[stable(feature = "simd_x86", since = "1.27.0")]
1365pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1366 crate::arch::asm!(
1368 vps!("movntdq", ",{a}"),
1369 p = in(reg) mem_addr,
1370 a = in(xmm_reg) a,
1371 options(nostack, preserves_flags),
1372 );
1373}
1374
1375#[inline]
1390#[target_feature(enable = "sse2")]
1391#[cfg_attr(test, assert_instr(movnti))]
1392#[stable(feature = "simd_x86", since = "1.27.0")]
1393pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1394 crate::arch::asm!(
1396 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1398 a = in(reg) a,
1399 options(nostack, preserves_flags),
1400 );
1401}
1402
1403#[inline]
1408#[target_feature(enable = "sse2")]
1409#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1411#[stable(feature = "simd_x86", since = "1.27.0")]
1412pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1413 unsafe {
1414 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1415 transmute(r)
1416 }
1417}
1418
1419#[inline]
1424#[target_feature(enable = "sse2")]
1425#[cfg_attr(test, assert_instr(packsswb))]
1426#[stable(feature = "simd_x86", since = "1.27.0")]
1427pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1428 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1429}
1430
1431#[inline]
1436#[target_feature(enable = "sse2")]
1437#[cfg_attr(test, assert_instr(packssdw))]
1438#[stable(feature = "simd_x86", since = "1.27.0")]
1439pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1440 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1441}
1442
1443#[inline]
1448#[target_feature(enable = "sse2")]
1449#[cfg_attr(test, assert_instr(packuswb))]
1450#[stable(feature = "simd_x86", since = "1.27.0")]
1451pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1452 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1453}
1454
1455#[inline]
1459#[target_feature(enable = "sse2")]
1460#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1461#[rustc_legacy_const_generics(1)]
1462#[stable(feature = "simd_x86", since = "1.27.0")]
1463pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1464 static_assert_uimm_bits!(IMM8, 3);
1465 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1466}
1467
1468#[inline]
1472#[target_feature(enable = "sse2")]
1473#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1474#[rustc_legacy_const_generics(2)]
1475#[stable(feature = "simd_x86", since = "1.27.0")]
1476pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1477 static_assert_uimm_bits!(IMM8, 3);
1478 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1479}
1480
1481#[inline]
1485#[target_feature(enable = "sse2")]
1486#[cfg_attr(test, assert_instr(pmovmskb))]
1487#[stable(feature = "simd_x86", since = "1.27.0")]
1488pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1489 unsafe {
1490 let z = i8x16::ZERO;
1491 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1492 simd_bitmask::<_, u16>(m) as u32 as i32
1493 }
1494}
1495
1496#[inline]
1500#[target_feature(enable = "sse2")]
1501#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1502#[rustc_legacy_const_generics(1)]
1503#[stable(feature = "simd_x86", since = "1.27.0")]
1504pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1505 static_assert_uimm_bits!(IMM8, 8);
1506 unsafe {
1507 let a = a.as_i32x4();
1508 let x: i32x4 = simd_shuffle!(
1509 a,
1510 a,
1511 [
1512 IMM8 as u32 & 0b11,
1513 (IMM8 as u32 >> 2) & 0b11,
1514 (IMM8 as u32 >> 4) & 0b11,
1515 (IMM8 as u32 >> 6) & 0b11,
1516 ],
1517 );
1518 transmute(x)
1519 }
1520}
1521
1522#[inline]
1530#[target_feature(enable = "sse2")]
1531#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1532#[rustc_legacy_const_generics(1)]
1533#[stable(feature = "simd_x86", since = "1.27.0")]
1534pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1535 static_assert_uimm_bits!(IMM8, 8);
1536 unsafe {
1537 let a = a.as_i16x8();
1538 let x: i16x8 = simd_shuffle!(
1539 a,
1540 a,
1541 [
1542 0,
1543 1,
1544 2,
1545 3,
1546 (IMM8 as u32 & 0b11) + 4,
1547 ((IMM8 as u32 >> 2) & 0b11) + 4,
1548 ((IMM8 as u32 >> 4) & 0b11) + 4,
1549 ((IMM8 as u32 >> 6) & 0b11) + 4,
1550 ],
1551 );
1552 transmute(x)
1553 }
1554}
1555
1556#[inline]
1564#[target_feature(enable = "sse2")]
1565#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1566#[rustc_legacy_const_generics(1)]
1567#[stable(feature = "simd_x86", since = "1.27.0")]
1568pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1569 static_assert_uimm_bits!(IMM8, 8);
1570 unsafe {
1571 let a = a.as_i16x8();
1572 let x: i16x8 = simd_shuffle!(
1573 a,
1574 a,
1575 [
1576 IMM8 as u32 & 0b11,
1577 (IMM8 as u32 >> 2) & 0b11,
1578 (IMM8 as u32 >> 4) & 0b11,
1579 (IMM8 as u32 >> 6) & 0b11,
1580 4,
1581 5,
1582 6,
1583 7,
1584 ],
1585 );
1586 transmute(x)
1587 }
1588}
1589
1590#[inline]
1594#[target_feature(enable = "sse2")]
1595#[cfg_attr(test, assert_instr(punpckhbw))]
1596#[stable(feature = "simd_x86", since = "1.27.0")]
1597pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1598 unsafe {
1599 transmute::<i8x16, _>(simd_shuffle!(
1600 a.as_i8x16(),
1601 b.as_i8x16(),
1602 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1603 ))
1604 }
1605}
1606
1607#[inline]
1611#[target_feature(enable = "sse2")]
1612#[cfg_attr(test, assert_instr(punpckhwd))]
1613#[stable(feature = "simd_x86", since = "1.27.0")]
1614pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1615 unsafe {
1616 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1617 transmute::<i16x8, _>(x)
1618 }
1619}
1620
1621#[inline]
1625#[target_feature(enable = "sse2")]
1626#[cfg_attr(test, assert_instr(unpckhps))]
1627#[stable(feature = "simd_x86", since = "1.27.0")]
1628pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1629 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1630}
1631
1632#[inline]
1636#[target_feature(enable = "sse2")]
1637#[cfg_attr(test, assert_instr(unpckhpd))]
1638#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1640 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1641}
1642
1643#[inline]
1647#[target_feature(enable = "sse2")]
1648#[cfg_attr(test, assert_instr(punpcklbw))]
1649#[stable(feature = "simd_x86", since = "1.27.0")]
1650pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1651 unsafe {
1652 transmute::<i8x16, _>(simd_shuffle!(
1653 a.as_i8x16(),
1654 b.as_i8x16(),
1655 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1656 ))
1657 }
1658}
1659
1660#[inline]
1664#[target_feature(enable = "sse2")]
1665#[cfg_attr(test, assert_instr(punpcklwd))]
1666#[stable(feature = "simd_x86", since = "1.27.0")]
1667pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1668 unsafe {
1669 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1670 transmute::<i16x8, _>(x)
1671 }
1672}
1673
1674#[inline]
1678#[target_feature(enable = "sse2")]
1679#[cfg_attr(test, assert_instr(unpcklps))]
1680#[stable(feature = "simd_x86", since = "1.27.0")]
1681pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1682 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1683}
1684
1685#[inline]
1689#[target_feature(enable = "sse2")]
1690#[cfg_attr(test, assert_instr(movlhps))]
1691#[stable(feature = "simd_x86", since = "1.27.0")]
1692pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1693 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1694}
1695
1696#[inline]
1701#[target_feature(enable = "sse2")]
1702#[cfg_attr(test, assert_instr(addsd))]
1703#[stable(feature = "simd_x86", since = "1.27.0")]
1704pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1705 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1706}
1707
1708#[inline]
1713#[target_feature(enable = "sse2")]
1714#[cfg_attr(test, assert_instr(addpd))]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1717 unsafe { simd_add(a, b) }
1718}
1719
1720#[inline]
1725#[target_feature(enable = "sse2")]
1726#[cfg_attr(test, assert_instr(divsd))]
1727#[stable(feature = "simd_x86", since = "1.27.0")]
1728pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1729 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1730}
1731
1732#[inline]
1737#[target_feature(enable = "sse2")]
1738#[cfg_attr(test, assert_instr(divpd))]
1739#[stable(feature = "simd_x86", since = "1.27.0")]
1740pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1741 unsafe { simd_div(a, b) }
1742}
1743
1744#[inline]
1749#[target_feature(enable = "sse2")]
1750#[cfg_attr(test, assert_instr(maxsd))]
1751#[stable(feature = "simd_x86", since = "1.27.0")]
1752pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1753 unsafe { maxsd(a, b) }
1754}
1755
1756#[inline]
1761#[target_feature(enable = "sse2")]
1762#[cfg_attr(test, assert_instr(maxpd))]
1763#[stable(feature = "simd_x86", since = "1.27.0")]
1764pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1765 unsafe { maxpd(a, b) }
1766}
1767
1768#[inline]
1773#[target_feature(enable = "sse2")]
1774#[cfg_attr(test, assert_instr(minsd))]
1775#[stable(feature = "simd_x86", since = "1.27.0")]
1776pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1777 unsafe { minsd(a, b) }
1778}
1779
1780#[inline]
1785#[target_feature(enable = "sse2")]
1786#[cfg_attr(test, assert_instr(minpd))]
1787#[stable(feature = "simd_x86", since = "1.27.0")]
1788pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1789 unsafe { minpd(a, b) }
1790}
1791
1792#[inline]
1797#[target_feature(enable = "sse2")]
1798#[cfg_attr(test, assert_instr(mulsd))]
1799#[stable(feature = "simd_x86", since = "1.27.0")]
1800pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1801 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1802}
1803
1804#[inline]
1809#[target_feature(enable = "sse2")]
1810#[cfg_attr(test, assert_instr(mulpd))]
1811#[stable(feature = "simd_x86", since = "1.27.0")]
1812pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1813 unsafe { simd_mul(a, b) }
1814}
1815
1816#[inline]
1821#[target_feature(enable = "sse2")]
1822#[cfg_attr(test, assert_instr(sqrtsd))]
1823#[stable(feature = "simd_x86", since = "1.27.0")]
1824pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1825 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1826}
1827
1828#[inline]
1832#[target_feature(enable = "sse2")]
1833#[cfg_attr(test, assert_instr(sqrtpd))]
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1835pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1836 unsafe { simd_fsqrt(a) }
1837}
1838
1839#[inline]
1844#[target_feature(enable = "sse2")]
1845#[cfg_attr(test, assert_instr(subsd))]
1846#[stable(feature = "simd_x86", since = "1.27.0")]
1847pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1848 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1849}
1850
1851#[inline]
1856#[target_feature(enable = "sse2")]
1857#[cfg_attr(test, assert_instr(subpd))]
1858#[stable(feature = "simd_x86", since = "1.27.0")]
1859pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1860 unsafe { simd_sub(a, b) }
1861}
1862
1863#[inline]
1868#[target_feature(enable = "sse2")]
1869#[cfg_attr(test, assert_instr(andps))]
1870#[stable(feature = "simd_x86", since = "1.27.0")]
1871pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1872 unsafe {
1873 let a: __m128i = transmute(a);
1874 let b: __m128i = transmute(b);
1875 transmute(_mm_and_si128(a, b))
1876 }
1877}
1878
1879#[inline]
1883#[target_feature(enable = "sse2")]
1884#[cfg_attr(test, assert_instr(andnps))]
1885#[stable(feature = "simd_x86", since = "1.27.0")]
1886pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1887 unsafe {
1888 let a: __m128i = transmute(a);
1889 let b: __m128i = transmute(b);
1890 transmute(_mm_andnot_si128(a, b))
1891 }
1892}
1893
1894#[inline]
1898#[target_feature(enable = "sse2")]
1899#[cfg_attr(test, assert_instr(orps))]
1900#[stable(feature = "simd_x86", since = "1.27.0")]
1901pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1902 unsafe {
1903 let a: __m128i = transmute(a);
1904 let b: __m128i = transmute(b);
1905 transmute(_mm_or_si128(a, b))
1906 }
1907}
1908
1909#[inline]
1913#[target_feature(enable = "sse2")]
1914#[cfg_attr(test, assert_instr(xorps))]
1915#[stable(feature = "simd_x86", since = "1.27.0")]
1916pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1917 unsafe {
1918 let a: __m128i = transmute(a);
1919 let b: __m128i = transmute(b);
1920 transmute(_mm_xor_si128(a, b))
1921 }
1922}
1923
1924#[inline]
1929#[target_feature(enable = "sse2")]
1930#[cfg_attr(test, assert_instr(cmpeqsd))]
1931#[stable(feature = "simd_x86", since = "1.27.0")]
1932pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1933 unsafe { cmpsd(a, b, 0) }
1934}
1935
1936#[inline]
1941#[target_feature(enable = "sse2")]
1942#[cfg_attr(test, assert_instr(cmpltsd))]
1943#[stable(feature = "simd_x86", since = "1.27.0")]
1944pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1945 unsafe { cmpsd(a, b, 1) }
1946}
1947
1948#[inline]
1953#[target_feature(enable = "sse2")]
1954#[cfg_attr(test, assert_instr(cmplesd))]
1955#[stable(feature = "simd_x86", since = "1.27.0")]
1956pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1957 unsafe { cmpsd(a, b, 2) }
1958}
1959
1960#[inline]
1965#[target_feature(enable = "sse2")]
1966#[cfg_attr(test, assert_instr(cmpltsd))]
1967#[stable(feature = "simd_x86", since = "1.27.0")]
1968pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1969 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1970}
1971
1972#[inline]
1977#[target_feature(enable = "sse2")]
1978#[cfg_attr(test, assert_instr(cmplesd))]
1979#[stable(feature = "simd_x86", since = "1.27.0")]
1980pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1981 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1982}
1983
1984#[inline]
1991#[target_feature(enable = "sse2")]
1992#[cfg_attr(test, assert_instr(cmpordsd))]
1993#[stable(feature = "simd_x86", since = "1.27.0")]
1994pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1995 unsafe { cmpsd(a, b, 7) }
1996}
1997
1998#[inline]
2004#[target_feature(enable = "sse2")]
2005#[cfg_attr(test, assert_instr(cmpunordsd))]
2006#[stable(feature = "simd_x86", since = "1.27.0")]
2007pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2008 unsafe { cmpsd(a, b, 3) }
2009}
2010
2011#[inline]
2016#[target_feature(enable = "sse2")]
2017#[cfg_attr(test, assert_instr(cmpneqsd))]
2018#[stable(feature = "simd_x86", since = "1.27.0")]
2019pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2020 unsafe { cmpsd(a, b, 4) }
2021}
2022
2023#[inline]
2028#[target_feature(enable = "sse2")]
2029#[cfg_attr(test, assert_instr(cmpnltsd))]
2030#[stable(feature = "simd_x86", since = "1.27.0")]
2031pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2032 unsafe { cmpsd(a, b, 5) }
2033}
2034
2035#[inline]
2040#[target_feature(enable = "sse2")]
2041#[cfg_attr(test, assert_instr(cmpnlesd))]
2042#[stable(feature = "simd_x86", since = "1.27.0")]
2043pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2044 unsafe { cmpsd(a, b, 6) }
2045}
2046
2047#[inline]
2052#[target_feature(enable = "sse2")]
2053#[cfg_attr(test, assert_instr(cmpnltsd))]
2054#[stable(feature = "simd_x86", since = "1.27.0")]
2055pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2056 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2057}
2058
2059#[inline]
2064#[target_feature(enable = "sse2")]
2065#[cfg_attr(test, assert_instr(cmpnlesd))]
2066#[stable(feature = "simd_x86", since = "1.27.0")]
2067pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2068 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2069}
2070
2071#[inline]
2075#[target_feature(enable = "sse2")]
2076#[cfg_attr(test, assert_instr(cmpeqpd))]
2077#[stable(feature = "simd_x86", since = "1.27.0")]
2078pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2079 unsafe { cmppd(a, b, 0) }
2080}
2081
2082#[inline]
2086#[target_feature(enable = "sse2")]
2087#[cfg_attr(test, assert_instr(cmpltpd))]
2088#[stable(feature = "simd_x86", since = "1.27.0")]
2089pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2090 unsafe { cmppd(a, b, 1) }
2091}
2092
2093#[inline]
2097#[target_feature(enable = "sse2")]
2098#[cfg_attr(test, assert_instr(cmplepd))]
2099#[stable(feature = "simd_x86", since = "1.27.0")]
2100pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2101 unsafe { cmppd(a, b, 2) }
2102}
2103
2104#[inline]
2108#[target_feature(enable = "sse2")]
2109#[cfg_attr(test, assert_instr(cmpltpd))]
2110#[stable(feature = "simd_x86", since = "1.27.0")]
2111pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2112 _mm_cmplt_pd(b, a)
2113}
2114
2115#[inline]
2119#[target_feature(enable = "sse2")]
2120#[cfg_attr(test, assert_instr(cmplepd))]
2121#[stable(feature = "simd_x86", since = "1.27.0")]
2122pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2123 _mm_cmple_pd(b, a)
2124}
2125
2126#[inline]
2130#[target_feature(enable = "sse2")]
2131#[cfg_attr(test, assert_instr(cmpordpd))]
2132#[stable(feature = "simd_x86", since = "1.27.0")]
2133pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2134 unsafe { cmppd(a, b, 7) }
2135}
2136
2137#[inline]
2141#[target_feature(enable = "sse2")]
2142#[cfg_attr(test, assert_instr(cmpunordpd))]
2143#[stable(feature = "simd_x86", since = "1.27.0")]
2144pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2145 unsafe { cmppd(a, b, 3) }
2146}
2147
2148#[inline]
2152#[target_feature(enable = "sse2")]
2153#[cfg_attr(test, assert_instr(cmpneqpd))]
2154#[stable(feature = "simd_x86", since = "1.27.0")]
2155pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2156 unsafe { cmppd(a, b, 4) }
2157}
2158
2159#[inline]
2163#[target_feature(enable = "sse2")]
2164#[cfg_attr(test, assert_instr(cmpnltpd))]
2165#[stable(feature = "simd_x86", since = "1.27.0")]
2166pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2167 unsafe { cmppd(a, b, 5) }
2168}
2169
2170#[inline]
2174#[target_feature(enable = "sse2")]
2175#[cfg_attr(test, assert_instr(cmpnlepd))]
2176#[stable(feature = "simd_x86", since = "1.27.0")]
2177pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2178 unsafe { cmppd(a, b, 6) }
2179}
2180
2181#[inline]
2185#[target_feature(enable = "sse2")]
2186#[cfg_attr(test, assert_instr(cmpnltpd))]
2187#[stable(feature = "simd_x86", since = "1.27.0")]
2188pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2189 _mm_cmpnlt_pd(b, a)
2190}
2191
2192#[inline]
2197#[target_feature(enable = "sse2")]
2198#[cfg_attr(test, assert_instr(cmpnlepd))]
2199#[stable(feature = "simd_x86", since = "1.27.0")]
2200pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2201 _mm_cmpnle_pd(b, a)
2202}
2203
2204#[inline]
2208#[target_feature(enable = "sse2")]
2209#[cfg_attr(test, assert_instr(comisd))]
2210#[stable(feature = "simd_x86", since = "1.27.0")]
2211pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2212 unsafe { comieqsd(a, b) }
2213}
2214
2215#[inline]
2219#[target_feature(enable = "sse2")]
2220#[cfg_attr(test, assert_instr(comisd))]
2221#[stable(feature = "simd_x86", since = "1.27.0")]
2222pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2223 unsafe { comiltsd(a, b) }
2224}
2225
2226#[inline]
2230#[target_feature(enable = "sse2")]
2231#[cfg_attr(test, assert_instr(comisd))]
2232#[stable(feature = "simd_x86", since = "1.27.0")]
2233pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2234 unsafe { comilesd(a, b) }
2235}
2236
2237#[inline]
2241#[target_feature(enable = "sse2")]
2242#[cfg_attr(test, assert_instr(comisd))]
2243#[stable(feature = "simd_x86", since = "1.27.0")]
2244pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2245 unsafe { comigtsd(a, b) }
2246}
2247
2248#[inline]
2252#[target_feature(enable = "sse2")]
2253#[cfg_attr(test, assert_instr(comisd))]
2254#[stable(feature = "simd_x86", since = "1.27.0")]
2255pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2256 unsafe { comigesd(a, b) }
2257}
2258
2259#[inline]
2263#[target_feature(enable = "sse2")]
2264#[cfg_attr(test, assert_instr(comisd))]
2265#[stable(feature = "simd_x86", since = "1.27.0")]
2266pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2267 unsafe { comineqsd(a, b) }
2268}
2269
2270#[inline]
2274#[target_feature(enable = "sse2")]
2275#[cfg_attr(test, assert_instr(ucomisd))]
2276#[stable(feature = "simd_x86", since = "1.27.0")]
2277pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2278 unsafe { ucomieqsd(a, b) }
2279}
2280
2281#[inline]
2285#[target_feature(enable = "sse2")]
2286#[cfg_attr(test, assert_instr(ucomisd))]
2287#[stable(feature = "simd_x86", since = "1.27.0")]
2288pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2289 unsafe { ucomiltsd(a, b) }
2290}
2291
2292#[inline]
2296#[target_feature(enable = "sse2")]
2297#[cfg_attr(test, assert_instr(ucomisd))]
2298#[stable(feature = "simd_x86", since = "1.27.0")]
2299pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2300 unsafe { ucomilesd(a, b) }
2301}
2302
2303#[inline]
2307#[target_feature(enable = "sse2")]
2308#[cfg_attr(test, assert_instr(ucomisd))]
2309#[stable(feature = "simd_x86", since = "1.27.0")]
2310pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2311 unsafe { ucomigtsd(a, b) }
2312}
2313
2314#[inline]
2318#[target_feature(enable = "sse2")]
2319#[cfg_attr(test, assert_instr(ucomisd))]
2320#[stable(feature = "simd_x86", since = "1.27.0")]
2321pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2322 unsafe { ucomigesd(a, b) }
2323}
2324
2325#[inline]
2329#[target_feature(enable = "sse2")]
2330#[cfg_attr(test, assert_instr(ucomisd))]
2331#[stable(feature = "simd_x86", since = "1.27.0")]
2332pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2333 unsafe { ucomineqsd(a, b) }
2334}
2335
2336#[inline]
2341#[target_feature(enable = "sse2")]
2342#[cfg_attr(test, assert_instr(cvtpd2ps))]
2343#[stable(feature = "simd_x86", since = "1.27.0")]
2344pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2345 unsafe {
2346 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2347 let zero = f32x2::ZERO;
2348 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2349 }
2350}
2351
2352#[inline]
2358#[target_feature(enable = "sse2")]
2359#[cfg_attr(test, assert_instr(cvtps2pd))]
2360#[stable(feature = "simd_x86", since = "1.27.0")]
2361pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2362 unsafe {
2363 let a = a.as_f32x4();
2364 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2365 }
2366}
2367
2368#[inline]
2373#[target_feature(enable = "sse2")]
2374#[cfg_attr(test, assert_instr(cvtpd2dq))]
2375#[stable(feature = "simd_x86", since = "1.27.0")]
2376pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2377 unsafe { transmute(cvtpd2dq(a)) }
2378}
2379
2380#[inline]
2385#[target_feature(enable = "sse2")]
2386#[cfg_attr(test, assert_instr(cvtsd2si))]
2387#[stable(feature = "simd_x86", since = "1.27.0")]
2388pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2389 unsafe { cvtsd2si(a) }
2390}
2391
2392#[inline]
2399#[target_feature(enable = "sse2")]
2400#[cfg_attr(test, assert_instr(cvtsd2ss))]
2401#[stable(feature = "simd_x86", since = "1.27.0")]
2402pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2403 unsafe { cvtsd2ss(a, b) }
2404}
2405
2406#[inline]
2410#[target_feature(enable = "sse2")]
2411#[stable(feature = "simd_x86", since = "1.27.0")]
2412pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2413 unsafe { simd_extract!(a, 0) }
2414}
2415
2416#[inline]
2423#[target_feature(enable = "sse2")]
2424#[cfg_attr(test, assert_instr(cvtss2sd))]
2425#[stable(feature = "simd_x86", since = "1.27.0")]
2426pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2427 unsafe {
2428 let elt: f32 = simd_extract!(b, 0);
2429 simd_insert!(a, 0, elt as f64)
2430 }
2431}
2432
2433#[inline]
2438#[target_feature(enable = "sse2")]
2439#[cfg_attr(test, assert_instr(cvttpd2dq))]
2440#[stable(feature = "simd_x86", since = "1.27.0")]
2441pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2442 unsafe { transmute(cvttpd2dq(a)) }
2443}
2444
2445#[inline]
2450#[target_feature(enable = "sse2")]
2451#[cfg_attr(test, assert_instr(cvttsd2si))]
2452#[stable(feature = "simd_x86", since = "1.27.0")]
2453pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2454 unsafe { cvttsd2si(a) }
2455}
2456
2457#[inline]
2462#[target_feature(enable = "sse2")]
2463#[cfg_attr(test, assert_instr(cvttps2dq))]
2464#[stable(feature = "simd_x86", since = "1.27.0")]
2465pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2466 unsafe { transmute(cvttps2dq(a)) }
2467}
2468
2469#[inline]
2474#[target_feature(enable = "sse2")]
2475#[stable(feature = "simd_x86", since = "1.27.0")]
2476pub fn _mm_set_sd(a: f64) -> __m128d {
2477 _mm_set_pd(0.0, a)
2478}
2479
2480#[inline]
2485#[target_feature(enable = "sse2")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2487pub fn _mm_set1_pd(a: f64) -> __m128d {
2488 _mm_set_pd(a, a)
2489}
2490
2491#[inline]
2496#[target_feature(enable = "sse2")]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_set_pd1(a: f64) -> __m128d {
2499 _mm_set_pd(a, a)
2500}
2501
2502#[inline]
2507#[target_feature(enable = "sse2")]
2508#[stable(feature = "simd_x86", since = "1.27.0")]
2509pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2510 __m128d([b, a])
2511}
2512
2513#[inline]
2518#[target_feature(enable = "sse2")]
2519#[stable(feature = "simd_x86", since = "1.27.0")]
2520pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2521 _mm_set_pd(b, a)
2522}
2523
2524#[inline]
2529#[target_feature(enable = "sse2")]
2530#[cfg_attr(test, assert_instr(xorp))]
2531#[stable(feature = "simd_x86", since = "1.27.0")]
2532pub fn _mm_setzero_pd() -> __m128d {
2533 const { unsafe { mem::zeroed() } }
2534}
2535
2536#[inline]
2543#[target_feature(enable = "sse2")]
2544#[cfg_attr(test, assert_instr(movmskpd))]
2545#[stable(feature = "simd_x86", since = "1.27.0")]
2546pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2547 unsafe {
2550 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2551 simd_bitmask::<i64x2, u8>(mask).into()
2552 }
2553}
2554
2555#[inline]
2562#[target_feature(enable = "sse2")]
2563#[cfg_attr(
2564 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2565 assert_instr(movaps)
2566)]
2567#[stable(feature = "simd_x86", since = "1.27.0")]
2568#[allow(clippy::cast_ptr_alignment)]
2569pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2570 *(mem_addr as *const __m128d)
2571}
2572
2573#[inline]
2578#[target_feature(enable = "sse2")]
2579#[cfg_attr(test, assert_instr(movsd))]
2580#[stable(feature = "simd_x86", since = "1.27.0")]
2581pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2582 _mm_setr_pd(*mem_addr, 0.)
2583}
2584
2585#[inline]
2591#[target_feature(enable = "sse2")]
2592#[cfg_attr(test, assert_instr(movhps))]
2593#[stable(feature = "simd_x86", since = "1.27.0")]
2594pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2595 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2596}
2597
2598#[inline]
2604#[target_feature(enable = "sse2")]
2605#[cfg_attr(test, assert_instr(movlps))]
2606#[stable(feature = "simd_x86", since = "1.27.0")]
2607pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2608 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2609}
2610
2611#[inline]
2627#[target_feature(enable = "sse2")]
2628#[cfg_attr(test, assert_instr(movntpd))]
2629#[stable(feature = "simd_x86", since = "1.27.0")]
2630#[allow(clippy::cast_ptr_alignment)]
2631pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2632 crate::arch::asm!(
2634 vps!("movntpd", ",{a}"),
2635 p = in(reg) mem_addr,
2636 a = in(xmm_reg) a,
2637 options(nostack, preserves_flags),
2638 );
2639}
2640
2641#[inline]
2646#[target_feature(enable = "sse2")]
2647#[cfg_attr(test, assert_instr(movlps))]
2648#[stable(feature = "simd_x86", since = "1.27.0")]
2649pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2650 *mem_addr = simd_extract!(a, 0)
2651}
2652
2653#[inline]
2659#[target_feature(enable = "sse2")]
2660#[cfg_attr(
2661 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2662 assert_instr(movaps)
2663)]
2664#[stable(feature = "simd_x86", since = "1.27.0")]
2665#[allow(clippy::cast_ptr_alignment)]
2666pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2667 *(mem_addr as *mut __m128d) = a;
2668}
2669
2670#[inline]
2676#[target_feature(enable = "sse2")]
2677#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2679pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2680 mem_addr.cast::<__m128d>().write_unaligned(a);
2681}
2682
2683#[inline]
2689#[target_feature(enable = "sse2")]
2690#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2691pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2692 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2693}
2694
2695#[inline]
2701#[target_feature(enable = "sse2")]
2702#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2703pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2704 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2705}
2706
2707#[inline]
2713#[target_feature(enable = "sse2")]
2714#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2715pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2716 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2717}
2718
2719#[inline]
2725#[target_feature(enable = "sse2")]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727#[allow(clippy::cast_ptr_alignment)]
2728pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2729 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2730 *(mem_addr as *mut __m128d) = b;
2731}
2732
2733#[inline]
2739#[target_feature(enable = "sse2")]
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741#[allow(clippy::cast_ptr_alignment)]
2742pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2743 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2744 *(mem_addr as *mut __m128d) = b;
2745}
2746
2747#[inline]
2754#[target_feature(enable = "sse2")]
2755#[stable(feature = "simd_x86", since = "1.27.0")]
2756#[allow(clippy::cast_ptr_alignment)]
2757pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2758 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2759 *(mem_addr as *mut __m128d) = b;
2760}
2761
2762#[inline]
2767#[target_feature(enable = "sse2")]
2768#[cfg_attr(test, assert_instr(movhps))]
2769#[stable(feature = "simd_x86", since = "1.27.0")]
2770pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2771 *mem_addr = simd_extract!(a, 1);
2772}
2773
2774#[inline]
2779#[target_feature(enable = "sse2")]
2780#[cfg_attr(test, assert_instr(movlps))]
2781#[stable(feature = "simd_x86", since = "1.27.0")]
2782pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2783 *mem_addr = simd_extract!(a, 0);
2784}
2785
2786#[inline]
2791#[target_feature(enable = "sse2")]
2792#[stable(feature = "simd_x86", since = "1.27.0")]
2794pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2795 let d = *mem_addr;
2796 _mm_setr_pd(d, d)
2797}
2798
2799#[inline]
2804#[target_feature(enable = "sse2")]
2805#[stable(feature = "simd_x86", since = "1.27.0")]
2807pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2808 _mm_load1_pd(mem_addr)
2809}
2810
2811#[inline]
2817#[target_feature(enable = "sse2")]
2818#[cfg_attr(
2819 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2820 assert_instr(movaps)
2821)]
2822#[stable(feature = "simd_x86", since = "1.27.0")]
2823pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2824 let a = _mm_load_pd(mem_addr);
2825 simd_shuffle!(a, a, [1, 0])
2826}
2827
2828#[inline]
2834#[target_feature(enable = "sse2")]
2835#[cfg_attr(test, assert_instr(movups))]
2836#[stable(feature = "simd_x86", since = "1.27.0")]
2837pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2838 let mut dst = _mm_undefined_pd();
2839 ptr::copy_nonoverlapping(
2840 mem_addr as *const u8,
2841 ptr::addr_of_mut!(dst) as *mut u8,
2842 mem::size_of::<__m128d>(),
2843 );
2844 dst
2845}
2846
2847#[inline]
2853#[target_feature(enable = "sse2")]
2854#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2855pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2856 transmute(i16x8::new(
2857 ptr::read_unaligned(mem_addr as *const i16),
2858 0,
2859 0,
2860 0,
2861 0,
2862 0,
2863 0,
2864 0,
2865 ))
2866}
2867
2868#[inline]
2874#[target_feature(enable = "sse2")]
2875#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2876pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2877 transmute(i32x4::new(
2878 ptr::read_unaligned(mem_addr as *const i32),
2879 0,
2880 0,
2881 0,
2882 ))
2883}
2884
2885#[inline]
2891#[target_feature(enable = "sse2")]
2892#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2893pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2894 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2895}
2896
2897#[inline]
2903#[target_feature(enable = "sse2")]
2904#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2905#[rustc_legacy_const_generics(2)]
2906#[stable(feature = "simd_x86", since = "1.27.0")]
2907pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2908 static_assert_uimm_bits!(MASK, 8);
2909 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2910}
2911
2912#[inline]
2918#[target_feature(enable = "sse2")]
2919#[cfg_attr(test, assert_instr(movsd))]
2920#[stable(feature = "simd_x86", since = "1.27.0")]
2921pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2922 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2923}
2924
2925#[inline]
2930#[target_feature(enable = "sse2")]
2931#[stable(feature = "simd_x86", since = "1.27.0")]
2932pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2933 unsafe { transmute(a) }
2934}
2935
2936#[inline]
2941#[target_feature(enable = "sse2")]
2942#[stable(feature = "simd_x86", since = "1.27.0")]
2943pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2944 unsafe { transmute(a) }
2945}
2946
2947#[inline]
2952#[target_feature(enable = "sse2")]
2953#[stable(feature = "simd_x86", since = "1.27.0")]
2954pub fn _mm_castps_pd(a: __m128) -> __m128d {
2955 unsafe { transmute(a) }
2956}
2957
2958#[inline]
2963#[target_feature(enable = "sse2")]
2964#[stable(feature = "simd_x86", since = "1.27.0")]
2965pub fn _mm_castps_si128(a: __m128) -> __m128i {
2966 unsafe { transmute(a) }
2967}
2968
2969#[inline]
2974#[target_feature(enable = "sse2")]
2975#[stable(feature = "simd_x86", since = "1.27.0")]
2976pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2977 unsafe { transmute(a) }
2978}
2979
2980#[inline]
2985#[target_feature(enable = "sse2")]
2986#[stable(feature = "simd_x86", since = "1.27.0")]
2987pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2988 unsafe { transmute(a) }
2989}
2990
2991#[inline]
2998#[target_feature(enable = "sse2")]
2999#[stable(feature = "simd_x86", since = "1.27.0")]
3000pub fn _mm_undefined_pd() -> __m128d {
3001 const { unsafe { mem::zeroed() } }
3002}
3003
3004#[inline]
3011#[target_feature(enable = "sse2")]
3012#[stable(feature = "simd_x86", since = "1.27.0")]
3013pub fn _mm_undefined_si128() -> __m128i {
3014 const { unsafe { mem::zeroed() } }
3015}
3016
3017#[inline]
3025#[target_feature(enable = "sse2")]
3026#[cfg_attr(test, assert_instr(unpckhpd))]
3027#[stable(feature = "simd_x86", since = "1.27.0")]
3028pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3029 unsafe { simd_shuffle!(a, b, [1, 3]) }
3030}
3031
3032#[inline]
3040#[target_feature(enable = "sse2")]
3041#[cfg_attr(test, assert_instr(movlhps))]
3042#[stable(feature = "simd_x86", since = "1.27.0")]
3043pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3044 unsafe { simd_shuffle!(a, b, [0, 2]) }
3045}
3046
3047#[allow(improper_ctypes)]
3048unsafe extern "C" {
3049 #[link_name = "llvm.x86.sse2.pause"]
3050 fn pause();
3051 #[link_name = "llvm.x86.sse2.clflush"]
3052 fn clflush(p: *const u8);
3053 #[link_name = "llvm.x86.sse2.lfence"]
3054 fn lfence();
3055 #[link_name = "llvm.x86.sse2.mfence"]
3056 fn mfence();
3057 #[link_name = "llvm.x86.sse2.psad.bw"]
3058 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3059 #[link_name = "llvm.x86.sse2.psll.w"]
3060 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3061 #[link_name = "llvm.x86.sse2.psll.d"]
3062 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3063 #[link_name = "llvm.x86.sse2.psll.q"]
3064 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3065 #[link_name = "llvm.x86.sse2.psra.w"]
3066 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3067 #[link_name = "llvm.x86.sse2.psra.d"]
3068 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3069 #[link_name = "llvm.x86.sse2.psrl.w"]
3070 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3071 #[link_name = "llvm.x86.sse2.psrl.d"]
3072 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3073 #[link_name = "llvm.x86.sse2.psrl.q"]
3074 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3075 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3076 fn cvtps2dq(a: __m128) -> i32x4;
3077 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3078 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3079 #[link_name = "llvm.x86.sse2.packsswb.128"]
3080 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3081 #[link_name = "llvm.x86.sse2.packssdw.128"]
3082 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3083 #[link_name = "llvm.x86.sse2.packuswb.128"]
3084 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3085 #[link_name = "llvm.x86.sse2.max.sd"]
3086 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3087 #[link_name = "llvm.x86.sse2.max.pd"]
3088 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3089 #[link_name = "llvm.x86.sse2.min.sd"]
3090 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3091 #[link_name = "llvm.x86.sse2.min.pd"]
3092 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3093 #[link_name = "llvm.x86.sse2.cmp.sd"]
3094 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3095 #[link_name = "llvm.x86.sse2.cmp.pd"]
3096 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3097 #[link_name = "llvm.x86.sse2.comieq.sd"]
3098 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3099 #[link_name = "llvm.x86.sse2.comilt.sd"]
3100 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3101 #[link_name = "llvm.x86.sse2.comile.sd"]
3102 fn comilesd(a: __m128d, b: __m128d) -> i32;
3103 #[link_name = "llvm.x86.sse2.comigt.sd"]
3104 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3105 #[link_name = "llvm.x86.sse2.comige.sd"]
3106 fn comigesd(a: __m128d, b: __m128d) -> i32;
3107 #[link_name = "llvm.x86.sse2.comineq.sd"]
3108 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3109 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3110 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3111 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3112 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3113 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3114 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3115 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3116 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3117 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3118 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3119 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3120 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3121 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3122 fn cvtpd2dq(a: __m128d) -> i32x4;
3123 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3124 fn cvtsd2si(a: __m128d) -> i32;
3125 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3126 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3127 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3128 fn cvttpd2dq(a: __m128d) -> i32x4;
3129 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3130 fn cvttsd2si(a: __m128d) -> i32;
3131 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3132 fn cvttps2dq(a: __m128) -> i32x4;
3133}
3134
3135#[cfg(test)]
3136mod tests {
3137 use crate::{
3138 core_arch::{simd::*, x86::*},
3139 hint::black_box,
3140 };
3141 use std::{
3142 boxed, f32, f64,
3143 mem::{self, transmute},
3144 ptr,
3145 };
3146 use stdarch_test::simd_test;
3147
3148 const NAN: f64 = f64::NAN;
3149
3150 #[test]
3151 fn test_mm_pause() {
3152 _mm_pause()
3153 }
3154
3155 #[simd_test(enable = "sse2")]
3156 unsafe fn test_mm_clflush() {
3157 let x = 0_u8;
3158 _mm_clflush(ptr::addr_of!(x));
3159 }
3160
3161 #[simd_test(enable = "sse2")]
3162 #[cfg_attr(miri, ignore)]
3164 unsafe fn test_mm_lfence() {
3165 _mm_lfence();
3166 }
3167
3168 #[simd_test(enable = "sse2")]
3169 #[cfg_attr(miri, ignore)]
3171 unsafe fn test_mm_mfence() {
3172 _mm_mfence();
3173 }
3174
3175 #[simd_test(enable = "sse2")]
3176 unsafe fn test_mm_add_epi8() {
3177 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3178 #[rustfmt::skip]
3179 let b = _mm_setr_epi8(
3180 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3181 );
3182 let r = _mm_add_epi8(a, b);
3183 #[rustfmt::skip]
3184 let e = _mm_setr_epi8(
3185 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3186 );
3187 assert_eq_m128i(r, e);
3188 }
3189
3190 #[simd_test(enable = "sse2")]
3191 unsafe fn test_mm_add_epi8_overflow() {
3192 let a = _mm_set1_epi8(0x7F);
3193 let b = _mm_set1_epi8(1);
3194 let r = _mm_add_epi8(a, b);
3195 assert_eq_m128i(r, _mm_set1_epi8(-128));
3196 }
3197
3198 #[simd_test(enable = "sse2")]
3199 unsafe fn test_mm_add_epi16() {
3200 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3201 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3202 let r = _mm_add_epi16(a, b);
3203 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3204 assert_eq_m128i(r, e);
3205 }
3206
3207 #[simd_test(enable = "sse2")]
3208 unsafe fn test_mm_add_epi32() {
3209 let a = _mm_setr_epi32(0, 1, 2, 3);
3210 let b = _mm_setr_epi32(4, 5, 6, 7);
3211 let r = _mm_add_epi32(a, b);
3212 let e = _mm_setr_epi32(4, 6, 8, 10);
3213 assert_eq_m128i(r, e);
3214 }
3215
3216 #[simd_test(enable = "sse2")]
3217 unsafe fn test_mm_add_epi64() {
3218 let a = _mm_setr_epi64x(0, 1);
3219 let b = _mm_setr_epi64x(2, 3);
3220 let r = _mm_add_epi64(a, b);
3221 let e = _mm_setr_epi64x(2, 4);
3222 assert_eq_m128i(r, e);
3223 }
3224
3225 #[simd_test(enable = "sse2")]
3226 unsafe fn test_mm_adds_epi8() {
3227 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3228 #[rustfmt::skip]
3229 let b = _mm_setr_epi8(
3230 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3231 );
3232 let r = _mm_adds_epi8(a, b);
3233 #[rustfmt::skip]
3234 let e = _mm_setr_epi8(
3235 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3236 );
3237 assert_eq_m128i(r, e);
3238 }
3239
3240 #[simd_test(enable = "sse2")]
3241 unsafe fn test_mm_adds_epi8_saturate_positive() {
3242 let a = _mm_set1_epi8(0x7F);
3243 let b = _mm_set1_epi8(1);
3244 let r = _mm_adds_epi8(a, b);
3245 assert_eq_m128i(r, a);
3246 }
3247
3248 #[simd_test(enable = "sse2")]
3249 unsafe fn test_mm_adds_epi8_saturate_negative() {
3250 let a = _mm_set1_epi8(-0x80);
3251 let b = _mm_set1_epi8(-1);
3252 let r = _mm_adds_epi8(a, b);
3253 assert_eq_m128i(r, a);
3254 }
3255
3256 #[simd_test(enable = "sse2")]
3257 unsafe fn test_mm_adds_epi16() {
3258 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3259 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3260 let r = _mm_adds_epi16(a, b);
3261 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3262 assert_eq_m128i(r, e);
3263 }
3264
3265 #[simd_test(enable = "sse2")]
3266 unsafe fn test_mm_adds_epi16_saturate_positive() {
3267 let a = _mm_set1_epi16(0x7FFF);
3268 let b = _mm_set1_epi16(1);
3269 let r = _mm_adds_epi16(a, b);
3270 assert_eq_m128i(r, a);
3271 }
3272
3273 #[simd_test(enable = "sse2")]
3274 unsafe fn test_mm_adds_epi16_saturate_negative() {
3275 let a = _mm_set1_epi16(-0x8000);
3276 let b = _mm_set1_epi16(-1);
3277 let r = _mm_adds_epi16(a, b);
3278 assert_eq_m128i(r, a);
3279 }
3280
3281 #[simd_test(enable = "sse2")]
3282 unsafe fn test_mm_adds_epu8() {
3283 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3284 #[rustfmt::skip]
3285 let b = _mm_setr_epi8(
3286 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3287 );
3288 let r = _mm_adds_epu8(a, b);
3289 #[rustfmt::skip]
3290 let e = _mm_setr_epi8(
3291 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3292 );
3293 assert_eq_m128i(r, e);
3294 }
3295
3296 #[simd_test(enable = "sse2")]
3297 unsafe fn test_mm_adds_epu8_saturate() {
3298 let a = _mm_set1_epi8(!0);
3299 let b = _mm_set1_epi8(1);
3300 let r = _mm_adds_epu8(a, b);
3301 assert_eq_m128i(r, a);
3302 }
3303
3304 #[simd_test(enable = "sse2")]
3305 unsafe fn test_mm_adds_epu16() {
3306 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3307 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3308 let r = _mm_adds_epu16(a, b);
3309 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3310 assert_eq_m128i(r, e);
3311 }
3312
3313 #[simd_test(enable = "sse2")]
3314 unsafe fn test_mm_adds_epu16_saturate() {
3315 let a = _mm_set1_epi16(!0);
3316 let b = _mm_set1_epi16(1);
3317 let r = _mm_adds_epu16(a, b);
3318 assert_eq_m128i(r, a);
3319 }
3320
3321 #[simd_test(enable = "sse2")]
3322 unsafe fn test_mm_avg_epu8() {
3323 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3324 let r = _mm_avg_epu8(a, b);
3325 assert_eq_m128i(r, _mm_set1_epi8(6));
3326 }
3327
3328 #[simd_test(enable = "sse2")]
3329 unsafe fn test_mm_avg_epu16() {
3330 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3331 let r = _mm_avg_epu16(a, b);
3332 assert_eq_m128i(r, _mm_set1_epi16(6));
3333 }
3334
3335 #[simd_test(enable = "sse2")]
3336 unsafe fn test_mm_madd_epi16() {
3337 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3338 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3339 let r = _mm_madd_epi16(a, b);
3340 let e = _mm_setr_epi32(29, 81, 149, 233);
3341 assert_eq_m128i(r, e);
3342
3343 let a = _mm_setr_epi16(
3346 i16::MAX,
3347 i16::MAX,
3348 i16::MIN,
3349 i16::MIN,
3350 i16::MIN,
3351 i16::MAX,
3352 0,
3353 0,
3354 );
3355 let b = _mm_setr_epi16(
3356 i16::MAX,
3357 i16::MAX,
3358 i16::MIN,
3359 i16::MIN,
3360 i16::MAX,
3361 i16::MIN,
3362 0,
3363 0,
3364 );
3365 let r = _mm_madd_epi16(a, b);
3366 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3367 assert_eq_m128i(r, e);
3368 }
3369
3370 #[simd_test(enable = "sse2")]
3371 unsafe fn test_mm_max_epi16() {
3372 let a = _mm_set1_epi16(1);
3373 let b = _mm_set1_epi16(-1);
3374 let r = _mm_max_epi16(a, b);
3375 assert_eq_m128i(r, a);
3376 }
3377
3378 #[simd_test(enable = "sse2")]
3379 unsafe fn test_mm_max_epu8() {
3380 let a = _mm_set1_epi8(1);
3381 let b = _mm_set1_epi8(!0);
3382 let r = _mm_max_epu8(a, b);
3383 assert_eq_m128i(r, b);
3384 }
3385
3386 #[simd_test(enable = "sse2")]
3387 unsafe fn test_mm_min_epi16() {
3388 let a = _mm_set1_epi16(1);
3389 let b = _mm_set1_epi16(-1);
3390 let r = _mm_min_epi16(a, b);
3391 assert_eq_m128i(r, b);
3392 }
3393
3394 #[simd_test(enable = "sse2")]
3395 unsafe fn test_mm_min_epu8() {
3396 let a = _mm_set1_epi8(1);
3397 let b = _mm_set1_epi8(!0);
3398 let r = _mm_min_epu8(a, b);
3399 assert_eq_m128i(r, a);
3400 }
3401
3402 #[simd_test(enable = "sse2")]
3403 unsafe fn test_mm_mulhi_epi16() {
3404 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3405 let r = _mm_mulhi_epi16(a, b);
3406 assert_eq_m128i(r, _mm_set1_epi16(-16));
3407 }
3408
3409 #[simd_test(enable = "sse2")]
3410 unsafe fn test_mm_mulhi_epu16() {
3411 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3412 let r = _mm_mulhi_epu16(a, b);
3413 assert_eq_m128i(r, _mm_set1_epi16(15));
3414 }
3415
3416 #[simd_test(enable = "sse2")]
3417 unsafe fn test_mm_mullo_epi16() {
3418 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3419 let r = _mm_mullo_epi16(a, b);
3420 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3421 }
3422
3423 #[simd_test(enable = "sse2")]
3424 unsafe fn test_mm_mul_epu32() {
3425 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3426 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3427 let r = _mm_mul_epu32(a, b);
3428 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3429 assert_eq_m128i(r, e);
3430 }
3431
3432 #[simd_test(enable = "sse2")]
3433 unsafe fn test_mm_sad_epu8() {
3434 #[rustfmt::skip]
3435 let a = _mm_setr_epi8(
3436 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3437 1, 2, 3, 4,
3438 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3439 1, 2, 3, 4,
3440 );
3441 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3442 let r = _mm_sad_epu8(a, b);
3443 let e = _mm_setr_epi64x(1020, 614);
3444 assert_eq_m128i(r, e);
3445 }
3446
3447 #[simd_test(enable = "sse2")]
3448 unsafe fn test_mm_sub_epi8() {
3449 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3450 let r = _mm_sub_epi8(a, b);
3451 assert_eq_m128i(r, _mm_set1_epi8(-1));
3452 }
3453
3454 #[simd_test(enable = "sse2")]
3455 unsafe fn test_mm_sub_epi16() {
3456 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3457 let r = _mm_sub_epi16(a, b);
3458 assert_eq_m128i(r, _mm_set1_epi16(-1));
3459 }
3460
3461 #[simd_test(enable = "sse2")]
3462 unsafe fn test_mm_sub_epi32() {
3463 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3464 let r = _mm_sub_epi32(a, b);
3465 assert_eq_m128i(r, _mm_set1_epi32(-1));
3466 }
3467
3468 #[simd_test(enable = "sse2")]
3469 unsafe fn test_mm_sub_epi64() {
3470 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3471 let r = _mm_sub_epi64(a, b);
3472 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3473 }
3474
3475 #[simd_test(enable = "sse2")]
3476 unsafe fn test_mm_subs_epi8() {
3477 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3478 let r = _mm_subs_epi8(a, b);
3479 assert_eq_m128i(r, _mm_set1_epi8(3));
3480 }
3481
3482 #[simd_test(enable = "sse2")]
3483 unsafe fn test_mm_subs_epi8_saturate_positive() {
3484 let a = _mm_set1_epi8(0x7F);
3485 let b = _mm_set1_epi8(-1);
3486 let r = _mm_subs_epi8(a, b);
3487 assert_eq_m128i(r, a);
3488 }
3489
3490 #[simd_test(enable = "sse2")]
3491 unsafe fn test_mm_subs_epi8_saturate_negative() {
3492 let a = _mm_set1_epi8(-0x80);
3493 let b = _mm_set1_epi8(1);
3494 let r = _mm_subs_epi8(a, b);
3495 assert_eq_m128i(r, a);
3496 }
3497
3498 #[simd_test(enable = "sse2")]
3499 unsafe fn test_mm_subs_epi16() {
3500 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3501 let r = _mm_subs_epi16(a, b);
3502 assert_eq_m128i(r, _mm_set1_epi16(3));
3503 }
3504
3505 #[simd_test(enable = "sse2")]
3506 unsafe fn test_mm_subs_epi16_saturate_positive() {
3507 let a = _mm_set1_epi16(0x7FFF);
3508 let b = _mm_set1_epi16(-1);
3509 let r = _mm_subs_epi16(a, b);
3510 assert_eq_m128i(r, a);
3511 }
3512
3513 #[simd_test(enable = "sse2")]
3514 unsafe fn test_mm_subs_epi16_saturate_negative() {
3515 let a = _mm_set1_epi16(-0x8000);
3516 let b = _mm_set1_epi16(1);
3517 let r = _mm_subs_epi16(a, b);
3518 assert_eq_m128i(r, a);
3519 }
3520
3521 #[simd_test(enable = "sse2")]
3522 unsafe fn test_mm_subs_epu8() {
3523 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3524 let r = _mm_subs_epu8(a, b);
3525 assert_eq_m128i(r, _mm_set1_epi8(3));
3526 }
3527
3528 #[simd_test(enable = "sse2")]
3529 unsafe fn test_mm_subs_epu8_saturate() {
3530 let a = _mm_set1_epi8(0);
3531 let b = _mm_set1_epi8(1);
3532 let r = _mm_subs_epu8(a, b);
3533 assert_eq_m128i(r, a);
3534 }
3535
3536 #[simd_test(enable = "sse2")]
3537 unsafe fn test_mm_subs_epu16() {
3538 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3539 let r = _mm_subs_epu16(a, b);
3540 assert_eq_m128i(r, _mm_set1_epi16(3));
3541 }
3542
3543 #[simd_test(enable = "sse2")]
3544 unsafe fn test_mm_subs_epu16_saturate() {
3545 let a = _mm_set1_epi16(0);
3546 let b = _mm_set1_epi16(1);
3547 let r = _mm_subs_epu16(a, b);
3548 assert_eq_m128i(r, a);
3549 }
3550
3551 #[simd_test(enable = "sse2")]
3552 unsafe fn test_mm_slli_si128() {
3553 #[rustfmt::skip]
3554 let a = _mm_setr_epi8(
3555 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3556 );
3557 let r = _mm_slli_si128::<1>(a);
3558 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3559 assert_eq_m128i(r, e);
3560
3561 #[rustfmt::skip]
3562 let a = _mm_setr_epi8(
3563 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3564 );
3565 let r = _mm_slli_si128::<15>(a);
3566 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3567 assert_eq_m128i(r, e);
3568
3569 #[rustfmt::skip]
3570 let a = _mm_setr_epi8(
3571 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3572 );
3573 let r = _mm_slli_si128::<16>(a);
3574 assert_eq_m128i(r, _mm_set1_epi8(0));
3575 }
3576
3577 #[simd_test(enable = "sse2")]
3578 unsafe fn test_mm_slli_epi16() {
3579 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3580 let r = _mm_slli_epi16::<4>(a);
3581 assert_eq_m128i(
3582 r,
3583 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3584 );
3585 let r = _mm_slli_epi16::<16>(a);
3586 assert_eq_m128i(r, _mm_set1_epi16(0));
3587 }
3588
3589 #[simd_test(enable = "sse2")]
3590 unsafe fn test_mm_sll_epi16() {
3591 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3592 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3593 assert_eq_m128i(
3594 r,
3595 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3596 );
3597 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3598 assert_eq_m128i(r, a);
3599 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3600 assert_eq_m128i(r, _mm_set1_epi16(0));
3601 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3602 assert_eq_m128i(r, _mm_set1_epi16(0));
3603 }
3604
3605 #[simd_test(enable = "sse2")]
3606 unsafe fn test_mm_slli_epi32() {
3607 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3608 let r = _mm_slli_epi32::<4>(a);
3609 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3610 let r = _mm_slli_epi32::<32>(a);
3611 assert_eq_m128i(r, _mm_set1_epi32(0));
3612 }
3613
3614 #[simd_test(enable = "sse2")]
3615 unsafe fn test_mm_sll_epi32() {
3616 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3617 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3618 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3619 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3620 assert_eq_m128i(r, a);
3621 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3622 assert_eq_m128i(r, _mm_set1_epi32(0));
3623 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3624 assert_eq_m128i(r, _mm_set1_epi32(0));
3625 }
3626
3627 #[simd_test(enable = "sse2")]
3628 unsafe fn test_mm_slli_epi64() {
3629 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3630 let r = _mm_slli_epi64::<4>(a);
3631 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3632 let r = _mm_slli_epi64::<64>(a);
3633 assert_eq_m128i(r, _mm_set1_epi64x(0));
3634 }
3635
3636 #[simd_test(enable = "sse2")]
3637 unsafe fn test_mm_sll_epi64() {
3638 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3639 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3640 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3641 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3642 assert_eq_m128i(r, a);
3643 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3644 assert_eq_m128i(r, _mm_set1_epi64x(0));
3645 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3646 assert_eq_m128i(r, _mm_set1_epi64x(0));
3647 }
3648
3649 #[simd_test(enable = "sse2")]
3650 unsafe fn test_mm_srai_epi16() {
3651 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3652 let r = _mm_srai_epi16::<4>(a);
3653 assert_eq_m128i(
3654 r,
3655 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3656 );
3657 let r = _mm_srai_epi16::<16>(a);
3658 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3659 }
3660
3661 #[simd_test(enable = "sse2")]
3662 unsafe fn test_mm_sra_epi16() {
3663 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3664 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3665 assert_eq_m128i(
3666 r,
3667 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3668 );
3669 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3670 assert_eq_m128i(r, a);
3671 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3672 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3673 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3674 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3675 }
3676
3677 #[simd_test(enable = "sse2")]
3678 unsafe fn test_mm_srai_epi32() {
3679 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3680 let r = _mm_srai_epi32::<4>(a);
3681 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3682 let r = _mm_srai_epi32::<32>(a);
3683 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3684 }
3685
3686 #[simd_test(enable = "sse2")]
3687 unsafe fn test_mm_sra_epi32() {
3688 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3689 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3690 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3691 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3692 assert_eq_m128i(r, a);
3693 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3694 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3695 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3696 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3697 }
3698
3699 #[simd_test(enable = "sse2")]
3700 unsafe fn test_mm_srli_si128() {
3701 #[rustfmt::skip]
3702 let a = _mm_setr_epi8(
3703 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3704 );
3705 let r = _mm_srli_si128::<1>(a);
3706 #[rustfmt::skip]
3707 let e = _mm_setr_epi8(
3708 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3709 );
3710 assert_eq_m128i(r, e);
3711
3712 #[rustfmt::skip]
3713 let a = _mm_setr_epi8(
3714 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3715 );
3716 let r = _mm_srli_si128::<15>(a);
3717 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3718 assert_eq_m128i(r, e);
3719
3720 #[rustfmt::skip]
3721 let a = _mm_setr_epi8(
3722 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3723 );
3724 let r = _mm_srli_si128::<16>(a);
3725 assert_eq_m128i(r, _mm_set1_epi8(0));
3726 }
3727
3728 #[simd_test(enable = "sse2")]
3729 unsafe fn test_mm_srli_epi16() {
3730 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3731 let r = _mm_srli_epi16::<4>(a);
3732 assert_eq_m128i(
3733 r,
3734 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3735 );
3736 let r = _mm_srli_epi16::<16>(a);
3737 assert_eq_m128i(r, _mm_set1_epi16(0));
3738 }
3739
3740 #[simd_test(enable = "sse2")]
3741 unsafe fn test_mm_srl_epi16() {
3742 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3743 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3744 assert_eq_m128i(
3745 r,
3746 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3747 );
3748 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3749 assert_eq_m128i(r, a);
3750 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3751 assert_eq_m128i(r, _mm_set1_epi16(0));
3752 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3753 assert_eq_m128i(r, _mm_set1_epi16(0));
3754 }
3755
3756 #[simd_test(enable = "sse2")]
3757 unsafe fn test_mm_srli_epi32() {
3758 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3759 let r = _mm_srli_epi32::<4>(a);
3760 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3761 let r = _mm_srli_epi32::<32>(a);
3762 assert_eq_m128i(r, _mm_set1_epi32(0));
3763 }
3764
3765 #[simd_test(enable = "sse2")]
3766 unsafe fn test_mm_srl_epi32() {
3767 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3768 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3769 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3770 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3771 assert_eq_m128i(r, a);
3772 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3773 assert_eq_m128i(r, _mm_set1_epi32(0));
3774 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3775 assert_eq_m128i(r, _mm_set1_epi32(0));
3776 }
3777
3778 #[simd_test(enable = "sse2")]
3779 unsafe fn test_mm_srli_epi64() {
3780 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3781 let r = _mm_srli_epi64::<4>(a);
3782 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3783 let r = _mm_srli_epi64::<64>(a);
3784 assert_eq_m128i(r, _mm_set1_epi64x(0));
3785 }
3786
3787 #[simd_test(enable = "sse2")]
3788 unsafe fn test_mm_srl_epi64() {
3789 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3790 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3791 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3792 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3793 assert_eq_m128i(r, a);
3794 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3795 assert_eq_m128i(r, _mm_set1_epi64x(0));
3796 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3797 assert_eq_m128i(r, _mm_set1_epi64x(0));
3798 }
3799
3800 #[simd_test(enable = "sse2")]
3801 unsafe fn test_mm_and_si128() {
3802 let a = _mm_set1_epi8(5);
3803 let b = _mm_set1_epi8(3);
3804 let r = _mm_and_si128(a, b);
3805 assert_eq_m128i(r, _mm_set1_epi8(1));
3806 }
3807
3808 #[simd_test(enable = "sse2")]
3809 unsafe fn test_mm_andnot_si128() {
3810 let a = _mm_set1_epi8(5);
3811 let b = _mm_set1_epi8(3);
3812 let r = _mm_andnot_si128(a, b);
3813 assert_eq_m128i(r, _mm_set1_epi8(2));
3814 }
3815
3816 #[simd_test(enable = "sse2")]
3817 unsafe fn test_mm_or_si128() {
3818 let a = _mm_set1_epi8(5);
3819 let b = _mm_set1_epi8(3);
3820 let r = _mm_or_si128(a, b);
3821 assert_eq_m128i(r, _mm_set1_epi8(7));
3822 }
3823
3824 #[simd_test(enable = "sse2")]
3825 unsafe fn test_mm_xor_si128() {
3826 let a = _mm_set1_epi8(5);
3827 let b = _mm_set1_epi8(3);
3828 let r = _mm_xor_si128(a, b);
3829 assert_eq_m128i(r, _mm_set1_epi8(6));
3830 }
3831
3832 #[simd_test(enable = "sse2")]
3833 unsafe fn test_mm_cmpeq_epi8() {
3834 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3835 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3836 let r = _mm_cmpeq_epi8(a, b);
3837 #[rustfmt::skip]
3838 assert_eq_m128i(
3839 r,
3840 _mm_setr_epi8(
3841 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3842 )
3843 );
3844 }
3845
3846 #[simd_test(enable = "sse2")]
3847 unsafe fn test_mm_cmpeq_epi16() {
3848 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3849 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3850 let r = _mm_cmpeq_epi16(a, b);
3851 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3852 }
3853
3854 #[simd_test(enable = "sse2")]
3855 unsafe fn test_mm_cmpeq_epi32() {
3856 let a = _mm_setr_epi32(0, 1, 2, 3);
3857 let b = _mm_setr_epi32(3, 2, 2, 0);
3858 let r = _mm_cmpeq_epi32(a, b);
3859 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3860 }
3861
3862 #[simd_test(enable = "sse2")]
3863 unsafe fn test_mm_cmpgt_epi8() {
3864 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3865 let b = _mm_set1_epi8(0);
3866 let r = _mm_cmpgt_epi8(a, b);
3867 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3868 assert_eq_m128i(r, e);
3869 }
3870
3871 #[simd_test(enable = "sse2")]
3872 unsafe fn test_mm_cmpgt_epi16() {
3873 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3874 let b = _mm_set1_epi16(0);
3875 let r = _mm_cmpgt_epi16(a, b);
3876 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3877 assert_eq_m128i(r, e);
3878 }
3879
3880 #[simd_test(enable = "sse2")]
3881 unsafe fn test_mm_cmpgt_epi32() {
3882 let a = _mm_set_epi32(5, 0, 0, 0);
3883 let b = _mm_set1_epi32(0);
3884 let r = _mm_cmpgt_epi32(a, b);
3885 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3886 }
3887
3888 #[simd_test(enable = "sse2")]
3889 unsafe fn test_mm_cmplt_epi8() {
3890 let a = _mm_set1_epi8(0);
3891 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3892 let r = _mm_cmplt_epi8(a, b);
3893 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3894 assert_eq_m128i(r, e);
3895 }
3896
3897 #[simd_test(enable = "sse2")]
3898 unsafe fn test_mm_cmplt_epi16() {
3899 let a = _mm_set1_epi16(0);
3900 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3901 let r = _mm_cmplt_epi16(a, b);
3902 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3903 assert_eq_m128i(r, e);
3904 }
3905
3906 #[simd_test(enable = "sse2")]
3907 unsafe fn test_mm_cmplt_epi32() {
3908 let a = _mm_set1_epi32(0);
3909 let b = _mm_set_epi32(5, 0, 0, 0);
3910 let r = _mm_cmplt_epi32(a, b);
3911 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3912 }
3913
3914 #[simd_test(enable = "sse2")]
3915 unsafe fn test_mm_cvtepi32_pd() {
3916 let a = _mm_set_epi32(35, 25, 15, 5);
3917 let r = _mm_cvtepi32_pd(a);
3918 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3919 }
3920
3921 #[simd_test(enable = "sse2")]
3922 unsafe fn test_mm_cvtsi32_sd() {
3923 let a = _mm_set1_pd(3.5);
3924 let r = _mm_cvtsi32_sd(a, 5);
3925 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3926 }
3927
3928 #[simd_test(enable = "sse2")]
3929 unsafe fn test_mm_cvtepi32_ps() {
3930 let a = _mm_setr_epi32(1, 2, 3, 4);
3931 let r = _mm_cvtepi32_ps(a);
3932 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3933 }
3934
3935 #[simd_test(enable = "sse2")]
3936 unsafe fn test_mm_cvtps_epi32() {
3937 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3938 let r = _mm_cvtps_epi32(a);
3939 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3940 }
3941
3942 #[simd_test(enable = "sse2")]
3943 unsafe fn test_mm_cvtsi32_si128() {
3944 let r = _mm_cvtsi32_si128(5);
3945 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3946 }
3947
3948 #[simd_test(enable = "sse2")]
3949 unsafe fn test_mm_cvtsi128_si32() {
3950 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3951 assert_eq!(r, 5);
3952 }
3953
3954 #[simd_test(enable = "sse2")]
3955 unsafe fn test_mm_set_epi64x() {
3956 let r = _mm_set_epi64x(0, 1);
3957 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3958 }
3959
3960 #[simd_test(enable = "sse2")]
3961 unsafe fn test_mm_set_epi32() {
3962 let r = _mm_set_epi32(0, 1, 2, 3);
3963 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3964 }
3965
3966 #[simd_test(enable = "sse2")]
3967 unsafe fn test_mm_set_epi16() {
3968 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3969 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3970 }
3971
3972 #[simd_test(enable = "sse2")]
3973 unsafe fn test_mm_set_epi8() {
3974 #[rustfmt::skip]
3975 let r = _mm_set_epi8(
3976 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3977 );
3978 #[rustfmt::skip]
3979 let e = _mm_setr_epi8(
3980 15, 14, 13, 12, 11, 10, 9, 8,
3981 7, 6, 5, 4, 3, 2, 1, 0,
3982 );
3983 assert_eq_m128i(r, e);
3984 }
3985
3986 #[simd_test(enable = "sse2")]
3987 unsafe fn test_mm_set1_epi64x() {
3988 let r = _mm_set1_epi64x(1);
3989 assert_eq_m128i(r, _mm_set1_epi64x(1));
3990 }
3991
3992 #[simd_test(enable = "sse2")]
3993 unsafe fn test_mm_set1_epi32() {
3994 let r = _mm_set1_epi32(1);
3995 assert_eq_m128i(r, _mm_set1_epi32(1));
3996 }
3997
3998 #[simd_test(enable = "sse2")]
3999 unsafe fn test_mm_set1_epi16() {
4000 let r = _mm_set1_epi16(1);
4001 assert_eq_m128i(r, _mm_set1_epi16(1));
4002 }
4003
4004 #[simd_test(enable = "sse2")]
4005 unsafe fn test_mm_set1_epi8() {
4006 let r = _mm_set1_epi8(1);
4007 assert_eq_m128i(r, _mm_set1_epi8(1));
4008 }
4009
4010 #[simd_test(enable = "sse2")]
4011 unsafe fn test_mm_setr_epi32() {
4012 let r = _mm_setr_epi32(0, 1, 2, 3);
4013 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4014 }
4015
4016 #[simd_test(enable = "sse2")]
4017 unsafe fn test_mm_setr_epi16() {
4018 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4019 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4020 }
4021
4022 #[simd_test(enable = "sse2")]
4023 unsafe fn test_mm_setr_epi8() {
4024 #[rustfmt::skip]
4025 let r = _mm_setr_epi8(
4026 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4027 );
4028 #[rustfmt::skip]
4029 let e = _mm_setr_epi8(
4030 0, 1, 2, 3, 4, 5, 6, 7,
4031 8, 9, 10, 11, 12, 13, 14, 15,
4032 );
4033 assert_eq_m128i(r, e);
4034 }
4035
4036 #[simd_test(enable = "sse2")]
4037 unsafe fn test_mm_setzero_si128() {
4038 let r = _mm_setzero_si128();
4039 assert_eq_m128i(r, _mm_set1_epi64x(0));
4040 }
4041
4042 #[simd_test(enable = "sse2")]
4043 unsafe fn test_mm_loadl_epi64() {
4044 let a = _mm_setr_epi64x(6, 5);
4045 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4046 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4047 }
4048
4049 #[simd_test(enable = "sse2")]
4050 unsafe fn test_mm_load_si128() {
4051 let a = _mm_set_epi64x(5, 6);
4052 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4053 assert_eq_m128i(a, r);
4054 }
4055
4056 #[simd_test(enable = "sse2")]
4057 unsafe fn test_mm_loadu_si128() {
4058 let a = _mm_set_epi64x(5, 6);
4059 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4060 assert_eq_m128i(a, r);
4061 }
4062
4063 #[simd_test(enable = "sse2")]
4064 #[cfg_attr(miri, ignore)]
4067 unsafe fn test_mm_maskmoveu_si128() {
4068 let a = _mm_set1_epi8(9);
4069 #[rustfmt::skip]
4070 let mask = _mm_set_epi8(
4071 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4072 0, 0, 0, 0, 0, 0, 0, 0,
4073 );
4074 let mut r = _mm_set1_epi8(0);
4075 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4076 _mm_sfence();
4077 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4078 assert_eq_m128i(r, e);
4079 }
4080
4081 #[simd_test(enable = "sse2")]
4082 unsafe fn test_mm_store_si128() {
4083 let a = _mm_set1_epi8(9);
4084 let mut r = _mm_set1_epi8(0);
4085 _mm_store_si128(&mut r, a);
4086 assert_eq_m128i(r, a);
4087 }
4088
4089 #[simd_test(enable = "sse2")]
4090 unsafe fn test_mm_storeu_si128() {
4091 let a = _mm_set1_epi8(9);
4092 let mut r = _mm_set1_epi8(0);
4093 _mm_storeu_si128(&mut r, a);
4094 assert_eq_m128i(r, a);
4095 }
4096
4097 #[simd_test(enable = "sse2")]
4098 unsafe fn test_mm_storel_epi64() {
4099 let a = _mm_setr_epi64x(2, 9);
4100 let mut r = _mm_set1_epi8(0);
4101 _mm_storel_epi64(&mut r, a);
4102 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4103 }
4104
4105 #[simd_test(enable = "sse2")]
4106 #[cfg_attr(miri, ignore)]
4109 unsafe fn test_mm_stream_si128() {
4110 let a = _mm_setr_epi32(1, 2, 3, 4);
4111 let mut r = _mm_undefined_si128();
4112 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4113 _mm_sfence();
4114 assert_eq_m128i(r, a);
4115 }
4116
4117 #[simd_test(enable = "sse2")]
4118 #[cfg_attr(miri, ignore)]
4121 unsafe fn test_mm_stream_si32() {
4122 let a: i32 = 7;
4123 let mut mem = boxed::Box::<i32>::new(-1);
4124 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4125 _mm_sfence();
4126 assert_eq!(a, *mem);
4127 }
4128
4129 #[simd_test(enable = "sse2")]
4130 unsafe fn test_mm_move_epi64() {
4131 let a = _mm_setr_epi64x(5, 6);
4132 let r = _mm_move_epi64(a);
4133 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4134 }
4135
4136 #[simd_test(enable = "sse2")]
4137 unsafe fn test_mm_packs_epi16() {
4138 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4139 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4140 let r = _mm_packs_epi16(a, b);
4141 #[rustfmt::skip]
4142 assert_eq_m128i(
4143 r,
4144 _mm_setr_epi8(
4145 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4146 )
4147 );
4148 }
4149
4150 #[simd_test(enable = "sse2")]
4151 unsafe fn test_mm_packs_epi32() {
4152 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4153 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4154 let r = _mm_packs_epi32(a, b);
4155 assert_eq_m128i(
4156 r,
4157 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4158 );
4159 }
4160
4161 #[simd_test(enable = "sse2")]
4162 unsafe fn test_mm_packus_epi16() {
4163 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4164 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4165 let r = _mm_packus_epi16(a, b);
4166 assert_eq_m128i(
4167 r,
4168 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4169 );
4170 }
4171
4172 #[simd_test(enable = "sse2")]
4173 unsafe fn test_mm_extract_epi16() {
4174 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4175 let r1 = _mm_extract_epi16::<0>(a);
4176 let r2 = _mm_extract_epi16::<3>(a);
4177 assert_eq!(r1, 0xFFFF);
4178 assert_eq!(r2, 3);
4179 }
4180
4181 #[simd_test(enable = "sse2")]
4182 unsafe fn test_mm_insert_epi16() {
4183 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4184 let r = _mm_insert_epi16::<0>(a, 9);
4185 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4186 assert_eq_m128i(r, e);
4187 }
4188
4189 #[simd_test(enable = "sse2")]
4190 unsafe fn test_mm_movemask_epi8() {
4191 #[rustfmt::skip]
4192 let a = _mm_setr_epi8(
4193 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4194 0b0101, 0b1111_0000u8 as i8, 0, 0,
4195 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4196 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4197 );
4198 let r = _mm_movemask_epi8(a);
4199 assert_eq!(r, 0b10100110_00100101);
4200 }
4201
4202 #[simd_test(enable = "sse2")]
4203 unsafe fn test_mm_shuffle_epi32() {
4204 let a = _mm_setr_epi32(5, 10, 15, 20);
4205 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4206 let e = _mm_setr_epi32(20, 10, 10, 5);
4207 assert_eq_m128i(r, e);
4208 }
4209
4210 #[simd_test(enable = "sse2")]
4211 unsafe fn test_mm_shufflehi_epi16() {
4212 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4213 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4214 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4215 assert_eq_m128i(r, e);
4216 }
4217
4218 #[simd_test(enable = "sse2")]
4219 unsafe fn test_mm_shufflelo_epi16() {
4220 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4221 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4222 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4223 assert_eq_m128i(r, e);
4224 }
4225
4226 #[simd_test(enable = "sse2")]
4227 unsafe fn test_mm_unpackhi_epi8() {
4228 #[rustfmt::skip]
4229 let a = _mm_setr_epi8(
4230 0, 1, 2, 3, 4, 5, 6, 7,
4231 8, 9, 10, 11, 12, 13, 14, 15,
4232 );
4233 #[rustfmt::skip]
4234 let b = _mm_setr_epi8(
4235 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4236 );
4237 let r = _mm_unpackhi_epi8(a, b);
4238 #[rustfmt::skip]
4239 let e = _mm_setr_epi8(
4240 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4241 );
4242 assert_eq_m128i(r, e);
4243 }
4244
4245 #[simd_test(enable = "sse2")]
4246 unsafe fn test_mm_unpackhi_epi16() {
4247 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4248 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4249 let r = _mm_unpackhi_epi16(a, b);
4250 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4251 assert_eq_m128i(r, e);
4252 }
4253
4254 #[simd_test(enable = "sse2")]
4255 unsafe fn test_mm_unpackhi_epi32() {
4256 let a = _mm_setr_epi32(0, 1, 2, 3);
4257 let b = _mm_setr_epi32(4, 5, 6, 7);
4258 let r = _mm_unpackhi_epi32(a, b);
4259 let e = _mm_setr_epi32(2, 6, 3, 7);
4260 assert_eq_m128i(r, e);
4261 }
4262
4263 #[simd_test(enable = "sse2")]
4264 unsafe fn test_mm_unpackhi_epi64() {
4265 let a = _mm_setr_epi64x(0, 1);
4266 let b = _mm_setr_epi64x(2, 3);
4267 let r = _mm_unpackhi_epi64(a, b);
4268 let e = _mm_setr_epi64x(1, 3);
4269 assert_eq_m128i(r, e);
4270 }
4271
4272 #[simd_test(enable = "sse2")]
4273 unsafe fn test_mm_unpacklo_epi8() {
4274 #[rustfmt::skip]
4275 let a = _mm_setr_epi8(
4276 0, 1, 2, 3, 4, 5, 6, 7,
4277 8, 9, 10, 11, 12, 13, 14, 15,
4278 );
4279 #[rustfmt::skip]
4280 let b = _mm_setr_epi8(
4281 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4282 );
4283 let r = _mm_unpacklo_epi8(a, b);
4284 #[rustfmt::skip]
4285 let e = _mm_setr_epi8(
4286 0, 16, 1, 17, 2, 18, 3, 19,
4287 4, 20, 5, 21, 6, 22, 7, 23,
4288 );
4289 assert_eq_m128i(r, e);
4290 }
4291
4292 #[simd_test(enable = "sse2")]
4293 unsafe fn test_mm_unpacklo_epi16() {
4294 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4295 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4296 let r = _mm_unpacklo_epi16(a, b);
4297 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4298 assert_eq_m128i(r, e);
4299 }
4300
4301 #[simd_test(enable = "sse2")]
4302 unsafe fn test_mm_unpacklo_epi32() {
4303 let a = _mm_setr_epi32(0, 1, 2, 3);
4304 let b = _mm_setr_epi32(4, 5, 6, 7);
4305 let r = _mm_unpacklo_epi32(a, b);
4306 let e = _mm_setr_epi32(0, 4, 1, 5);
4307 assert_eq_m128i(r, e);
4308 }
4309
4310 #[simd_test(enable = "sse2")]
4311 unsafe fn test_mm_unpacklo_epi64() {
4312 let a = _mm_setr_epi64x(0, 1);
4313 let b = _mm_setr_epi64x(2, 3);
4314 let r = _mm_unpacklo_epi64(a, b);
4315 let e = _mm_setr_epi64x(0, 2);
4316 assert_eq_m128i(r, e);
4317 }
4318
4319 #[simd_test(enable = "sse2")]
4320 unsafe fn test_mm_add_sd() {
4321 let a = _mm_setr_pd(1.0, 2.0);
4322 let b = _mm_setr_pd(5.0, 10.0);
4323 let r = _mm_add_sd(a, b);
4324 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4325 }
4326
4327 #[simd_test(enable = "sse2")]
4328 unsafe fn test_mm_add_pd() {
4329 let a = _mm_setr_pd(1.0, 2.0);
4330 let b = _mm_setr_pd(5.0, 10.0);
4331 let r = _mm_add_pd(a, b);
4332 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4333 }
4334
4335 #[simd_test(enable = "sse2")]
4336 unsafe fn test_mm_div_sd() {
4337 let a = _mm_setr_pd(1.0, 2.0);
4338 let b = _mm_setr_pd(5.0, 10.0);
4339 let r = _mm_div_sd(a, b);
4340 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4341 }
4342
4343 #[simd_test(enable = "sse2")]
4344 unsafe fn test_mm_div_pd() {
4345 let a = _mm_setr_pd(1.0, 2.0);
4346 let b = _mm_setr_pd(5.0, 10.0);
4347 let r = _mm_div_pd(a, b);
4348 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4349 }
4350
4351 #[simd_test(enable = "sse2")]
4352 unsafe fn test_mm_max_sd() {
4353 let a = _mm_setr_pd(1.0, 2.0);
4354 let b = _mm_setr_pd(5.0, 10.0);
4355 let r = _mm_max_sd(a, b);
4356 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4357 }
4358
4359 #[simd_test(enable = "sse2")]
4360 unsafe fn test_mm_max_pd() {
4361 let a = _mm_setr_pd(1.0, 2.0);
4362 let b = _mm_setr_pd(5.0, 10.0);
4363 let r = _mm_max_pd(a, b);
4364 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4365
4366 let a = _mm_setr_pd(-0.0, 0.0);
4368 let b = _mm_setr_pd(0.0, 0.0);
4369 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4370 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4371 let a: [u8; 16] = transmute(a);
4372 let b: [u8; 16] = transmute(b);
4373 assert_eq!(r1, b);
4374 assert_eq!(r2, a);
4375 assert_ne!(a, b); }
4377
4378 #[simd_test(enable = "sse2")]
4379 unsafe fn test_mm_min_sd() {
4380 let a = _mm_setr_pd(1.0, 2.0);
4381 let b = _mm_setr_pd(5.0, 10.0);
4382 let r = _mm_min_sd(a, b);
4383 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4384 }
4385
4386 #[simd_test(enable = "sse2")]
4387 unsafe fn test_mm_min_pd() {
4388 let a = _mm_setr_pd(1.0, 2.0);
4389 let b = _mm_setr_pd(5.0, 10.0);
4390 let r = _mm_min_pd(a, b);
4391 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4392
4393 let a = _mm_setr_pd(-0.0, 0.0);
4395 let b = _mm_setr_pd(0.0, 0.0);
4396 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4397 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4398 let a: [u8; 16] = transmute(a);
4399 let b: [u8; 16] = transmute(b);
4400 assert_eq!(r1, b);
4401 assert_eq!(r2, a);
4402 assert_ne!(a, b); }
4404
4405 #[simd_test(enable = "sse2")]
4406 unsafe fn test_mm_mul_sd() {
4407 let a = _mm_setr_pd(1.0, 2.0);
4408 let b = _mm_setr_pd(5.0, 10.0);
4409 let r = _mm_mul_sd(a, b);
4410 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4411 }
4412
4413 #[simd_test(enable = "sse2")]
4414 unsafe fn test_mm_mul_pd() {
4415 let a = _mm_setr_pd(1.0, 2.0);
4416 let b = _mm_setr_pd(5.0, 10.0);
4417 let r = _mm_mul_pd(a, b);
4418 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4419 }
4420
4421 #[simd_test(enable = "sse2")]
4422 unsafe fn test_mm_sqrt_sd() {
4423 let a = _mm_setr_pd(1.0, 2.0);
4424 let b = _mm_setr_pd(5.0, 10.0);
4425 let r = _mm_sqrt_sd(a, b);
4426 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4427 }
4428
4429 #[simd_test(enable = "sse2")]
4430 unsafe fn test_mm_sqrt_pd() {
4431 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4432 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4433 }
4434
4435 #[simd_test(enable = "sse2")]
4436 unsafe fn test_mm_sub_sd() {
4437 let a = _mm_setr_pd(1.0, 2.0);
4438 let b = _mm_setr_pd(5.0, 10.0);
4439 let r = _mm_sub_sd(a, b);
4440 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4441 }
4442
4443 #[simd_test(enable = "sse2")]
4444 unsafe fn test_mm_sub_pd() {
4445 let a = _mm_setr_pd(1.0, 2.0);
4446 let b = _mm_setr_pd(5.0, 10.0);
4447 let r = _mm_sub_pd(a, b);
4448 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4449 }
4450
4451 #[simd_test(enable = "sse2")]
4452 unsafe fn test_mm_and_pd() {
4453 let a = transmute(u64x2::splat(5));
4454 let b = transmute(u64x2::splat(3));
4455 let r = _mm_and_pd(a, b);
4456 let e = transmute(u64x2::splat(1));
4457 assert_eq_m128d(r, e);
4458 }
4459
4460 #[simd_test(enable = "sse2")]
4461 unsafe fn test_mm_andnot_pd() {
4462 let a = transmute(u64x2::splat(5));
4463 let b = transmute(u64x2::splat(3));
4464 let r = _mm_andnot_pd(a, b);
4465 let e = transmute(u64x2::splat(2));
4466 assert_eq_m128d(r, e);
4467 }
4468
4469 #[simd_test(enable = "sse2")]
4470 unsafe fn test_mm_or_pd() {
4471 let a = transmute(u64x2::splat(5));
4472 let b = transmute(u64x2::splat(3));
4473 let r = _mm_or_pd(a, b);
4474 let e = transmute(u64x2::splat(7));
4475 assert_eq_m128d(r, e);
4476 }
4477
4478 #[simd_test(enable = "sse2")]
4479 unsafe fn test_mm_xor_pd() {
4480 let a = transmute(u64x2::splat(5));
4481 let b = transmute(u64x2::splat(3));
4482 let r = _mm_xor_pd(a, b);
4483 let e = transmute(u64x2::splat(6));
4484 assert_eq_m128d(r, e);
4485 }
4486
4487 #[simd_test(enable = "sse2")]
4488 unsafe fn test_mm_cmpeq_sd() {
4489 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4490 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4491 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4492 assert_eq_m128i(r, e);
4493 }
4494
4495 #[simd_test(enable = "sse2")]
4496 unsafe fn test_mm_cmplt_sd() {
4497 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4498 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4499 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4500 assert_eq_m128i(r, e);
4501 }
4502
4503 #[simd_test(enable = "sse2")]
4504 unsafe fn test_mm_cmple_sd() {
4505 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4506 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4507 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4508 assert_eq_m128i(r, e);
4509 }
4510
4511 #[simd_test(enable = "sse2")]
4512 unsafe fn test_mm_cmpgt_sd() {
4513 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4514 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4515 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4516 assert_eq_m128i(r, e);
4517 }
4518
4519 #[simd_test(enable = "sse2")]
4520 unsafe fn test_mm_cmpge_sd() {
4521 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4522 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4523 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4524 assert_eq_m128i(r, e);
4525 }
4526
4527 #[simd_test(enable = "sse2")]
4528 unsafe fn test_mm_cmpord_sd() {
4529 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4530 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4531 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4532 assert_eq_m128i(r, e);
4533 }
4534
4535 #[simd_test(enable = "sse2")]
4536 unsafe fn test_mm_cmpunord_sd() {
4537 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4538 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4539 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4540 assert_eq_m128i(r, e);
4541 }
4542
4543 #[simd_test(enable = "sse2")]
4544 unsafe fn test_mm_cmpneq_sd() {
4545 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4546 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4547 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4548 assert_eq_m128i(r, e);
4549 }
4550
4551 #[simd_test(enable = "sse2")]
4552 unsafe fn test_mm_cmpnlt_sd() {
4553 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4554 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4555 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4556 assert_eq_m128i(r, e);
4557 }
4558
4559 #[simd_test(enable = "sse2")]
4560 unsafe fn test_mm_cmpnle_sd() {
4561 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4562 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4563 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4564 assert_eq_m128i(r, e);
4565 }
4566
4567 #[simd_test(enable = "sse2")]
4568 unsafe fn test_mm_cmpngt_sd() {
4569 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4570 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4571 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4572 assert_eq_m128i(r, e);
4573 }
4574
4575 #[simd_test(enable = "sse2")]
4576 unsafe fn test_mm_cmpnge_sd() {
4577 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4578 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4579 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4580 assert_eq_m128i(r, e);
4581 }
4582
4583 #[simd_test(enable = "sse2")]
4584 unsafe fn test_mm_cmpeq_pd() {
4585 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4586 let e = _mm_setr_epi64x(!0, 0);
4587 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4588 assert_eq_m128i(r, e);
4589 }
4590
4591 #[simd_test(enable = "sse2")]
4592 unsafe fn test_mm_cmplt_pd() {
4593 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4594 let e = _mm_setr_epi64x(0, !0);
4595 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4596 assert_eq_m128i(r, e);
4597 }
4598
4599 #[simd_test(enable = "sse2")]
4600 unsafe fn test_mm_cmple_pd() {
4601 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4602 let e = _mm_setr_epi64x(!0, !0);
4603 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4604 assert_eq_m128i(r, e);
4605 }
4606
4607 #[simd_test(enable = "sse2")]
4608 unsafe fn test_mm_cmpgt_pd() {
4609 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4610 let e = _mm_setr_epi64x(0, 0);
4611 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4612 assert_eq_m128i(r, e);
4613 }
4614
4615 #[simd_test(enable = "sse2")]
4616 unsafe fn test_mm_cmpge_pd() {
4617 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4618 let e = _mm_setr_epi64x(!0, 0);
4619 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4620 assert_eq_m128i(r, e);
4621 }
4622
4623 #[simd_test(enable = "sse2")]
4624 unsafe fn test_mm_cmpord_pd() {
4625 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4626 let e = _mm_setr_epi64x(0, !0);
4627 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4628 assert_eq_m128i(r, e);
4629 }
4630
4631 #[simd_test(enable = "sse2")]
4632 unsafe fn test_mm_cmpunord_pd() {
4633 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4634 let e = _mm_setr_epi64x(!0, 0);
4635 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4636 assert_eq_m128i(r, e);
4637 }
4638
4639 #[simd_test(enable = "sse2")]
4640 unsafe fn test_mm_cmpneq_pd() {
4641 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4642 let e = _mm_setr_epi64x(!0, !0);
4643 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4644 assert_eq_m128i(r, e);
4645 }
4646
4647 #[simd_test(enable = "sse2")]
4648 unsafe fn test_mm_cmpnlt_pd() {
4649 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4650 let e = _mm_setr_epi64x(0, 0);
4651 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4652 assert_eq_m128i(r, e);
4653 }
4654
4655 #[simd_test(enable = "sse2")]
4656 unsafe fn test_mm_cmpnle_pd() {
4657 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4658 let e = _mm_setr_epi64x(0, 0);
4659 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4660 assert_eq_m128i(r, e);
4661 }
4662
4663 #[simd_test(enable = "sse2")]
4664 unsafe fn test_mm_cmpngt_pd() {
4665 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4666 let e = _mm_setr_epi64x(0, !0);
4667 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4668 assert_eq_m128i(r, e);
4669 }
4670
4671 #[simd_test(enable = "sse2")]
4672 unsafe fn test_mm_cmpnge_pd() {
4673 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4674 let e = _mm_setr_epi64x(0, !0);
4675 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4676 assert_eq_m128i(r, e);
4677 }
4678
4679 #[simd_test(enable = "sse2")]
4680 unsafe fn test_mm_comieq_sd() {
4681 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4682 assert!(_mm_comieq_sd(a, b) != 0);
4683
4684 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4685 assert!(_mm_comieq_sd(a, b) == 0);
4686 }
4687
4688 #[simd_test(enable = "sse2")]
4689 unsafe fn test_mm_comilt_sd() {
4690 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4691 assert!(_mm_comilt_sd(a, b) == 0);
4692 }
4693
4694 #[simd_test(enable = "sse2")]
4695 unsafe fn test_mm_comile_sd() {
4696 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4697 assert!(_mm_comile_sd(a, b) != 0);
4698 }
4699
4700 #[simd_test(enable = "sse2")]
4701 unsafe fn test_mm_comigt_sd() {
4702 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4703 assert!(_mm_comigt_sd(a, b) == 0);
4704 }
4705
4706 #[simd_test(enable = "sse2")]
4707 unsafe fn test_mm_comige_sd() {
4708 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4709 assert!(_mm_comige_sd(a, b) != 0);
4710 }
4711
4712 #[simd_test(enable = "sse2")]
4713 unsafe fn test_mm_comineq_sd() {
4714 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4715 assert!(_mm_comineq_sd(a, b) == 0);
4716 }
4717
4718 #[simd_test(enable = "sse2")]
4719 unsafe fn test_mm_ucomieq_sd() {
4720 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4721 assert!(_mm_ucomieq_sd(a, b) != 0);
4722
4723 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4724 assert!(_mm_ucomieq_sd(a, b) == 0);
4725 }
4726
4727 #[simd_test(enable = "sse2")]
4728 unsafe fn test_mm_ucomilt_sd() {
4729 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4730 assert!(_mm_ucomilt_sd(a, b) == 0);
4731 }
4732
4733 #[simd_test(enable = "sse2")]
4734 unsafe fn test_mm_ucomile_sd() {
4735 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4736 assert!(_mm_ucomile_sd(a, b) != 0);
4737 }
4738
4739 #[simd_test(enable = "sse2")]
4740 unsafe fn test_mm_ucomigt_sd() {
4741 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4742 assert!(_mm_ucomigt_sd(a, b) == 0);
4743 }
4744
4745 #[simd_test(enable = "sse2")]
4746 unsafe fn test_mm_ucomige_sd() {
4747 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4748 assert!(_mm_ucomige_sd(a, b) != 0);
4749 }
4750
4751 #[simd_test(enable = "sse2")]
4752 unsafe fn test_mm_ucomineq_sd() {
4753 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4754 assert!(_mm_ucomineq_sd(a, b) == 0);
4755 }
4756
4757 #[simd_test(enable = "sse2")]
4758 unsafe fn test_mm_movemask_pd() {
4759 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4760 assert_eq!(r, 0b01);
4761
4762 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4763 assert_eq!(r, 0b11);
4764 }
4765
4766 #[repr(align(16))]
4767 struct Memory {
4768 data: [f64; 4],
4769 }
4770
4771 #[simd_test(enable = "sse2")]
4772 unsafe fn test_mm_load_pd() {
4773 let mem = Memory {
4774 data: [1.0f64, 2.0, 3.0, 4.0],
4775 };
4776 let vals = &mem.data;
4777 let d = vals.as_ptr();
4778
4779 let r = _mm_load_pd(d);
4780 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4781 }
4782
4783 #[simd_test(enable = "sse2")]
4784 unsafe fn test_mm_load_sd() {
4785 let a = 1.;
4786 let expected = _mm_setr_pd(a, 0.);
4787 let r = _mm_load_sd(&a);
4788 assert_eq_m128d(r, expected);
4789 }
4790
4791 #[simd_test(enable = "sse2")]
4792 unsafe fn test_mm_loadh_pd() {
4793 let a = _mm_setr_pd(1., 2.);
4794 let b = 3.;
4795 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4796 let r = _mm_loadh_pd(a, &b);
4797 assert_eq_m128d(r, expected);
4798 }
4799
4800 #[simd_test(enable = "sse2")]
4801 unsafe fn test_mm_loadl_pd() {
4802 let a = _mm_setr_pd(1., 2.);
4803 let b = 3.;
4804 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4805 let r = _mm_loadl_pd(a, &b);
4806 assert_eq_m128d(r, expected);
4807 }
4808
4809 #[simd_test(enable = "sse2")]
4810 #[cfg_attr(miri, ignore)]
4813 unsafe fn test_mm_stream_pd() {
4814 #[repr(align(128))]
4815 struct Memory {
4816 pub data: [f64; 2],
4817 }
4818 let a = _mm_set1_pd(7.0);
4819 let mut mem = Memory { data: [-1.0; 2] };
4820
4821 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4822 _mm_sfence();
4823 for i in 0..2 {
4824 assert_eq!(mem.data[i], get_m128d(a, i));
4825 }
4826 }
4827
4828 #[simd_test(enable = "sse2")]
4829 unsafe fn test_mm_store_sd() {
4830 let mut dest = 0.;
4831 let a = _mm_setr_pd(1., 2.);
4832 _mm_store_sd(&mut dest, a);
4833 assert_eq!(dest, _mm_cvtsd_f64(a));
4834 }
4835
4836 #[simd_test(enable = "sse2")]
4837 unsafe fn test_mm_store_pd() {
4838 let mut mem = Memory { data: [0.0f64; 4] };
4839 let vals = &mut mem.data;
4840 let a = _mm_setr_pd(1.0, 2.0);
4841 let d = vals.as_mut_ptr();
4842
4843 _mm_store_pd(d, *black_box(&a));
4844 assert_eq!(vals[0], 1.0);
4845 assert_eq!(vals[1], 2.0);
4846 }
4847
4848 #[simd_test(enable = "sse2")]
4849 unsafe fn test_mm_storeu_pd() {
4850 let mut mem = Memory { data: [0.0f64; 4] };
4851 let vals = &mut mem.data;
4852 let a = _mm_setr_pd(1.0, 2.0);
4853
4854 let mut ofs = 0;
4855 let mut p = vals.as_mut_ptr();
4856
4857 if (p as usize) & 0xf == 0 {
4859 ofs = 1;
4860 p = p.add(1);
4861 }
4862
4863 _mm_storeu_pd(p, *black_box(&a));
4864
4865 if ofs > 0 {
4866 assert_eq!(vals[ofs - 1], 0.0);
4867 }
4868 assert_eq!(vals[ofs + 0], 1.0);
4869 assert_eq!(vals[ofs + 1], 2.0);
4870 }
4871
4872 #[simd_test(enable = "sse2")]
4873 unsafe fn test_mm_storeu_si16() {
4874 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4875 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4876 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4877 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4878 assert_eq_m128i(r, e);
4879 }
4880
4881 #[simd_test(enable = "sse2")]
4882 unsafe fn test_mm_storeu_si32() {
4883 let a = _mm_setr_epi32(1, 2, 3, 4);
4884 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4885 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4886 let e = _mm_setr_epi32(1, 6, 7, 8);
4887 assert_eq_m128i(r, e);
4888 }
4889
4890 #[simd_test(enable = "sse2")]
4891 unsafe fn test_mm_storeu_si64() {
4892 let a = _mm_setr_epi64x(1, 2);
4893 let mut r = _mm_setr_epi64x(3, 4);
4894 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4895 let e = _mm_setr_epi64x(1, 4);
4896 assert_eq_m128i(r, e);
4897 }
4898
4899 #[simd_test(enable = "sse2")]
4900 unsafe fn test_mm_store1_pd() {
4901 let mut mem = Memory { data: [0.0f64; 4] };
4902 let vals = &mut mem.data;
4903 let a = _mm_setr_pd(1.0, 2.0);
4904 let d = vals.as_mut_ptr();
4905
4906 _mm_store1_pd(d, *black_box(&a));
4907 assert_eq!(vals[0], 1.0);
4908 assert_eq!(vals[1], 1.0);
4909 }
4910
4911 #[simd_test(enable = "sse2")]
4912 unsafe fn test_mm_store_pd1() {
4913 let mut mem = Memory { data: [0.0f64; 4] };
4914 let vals = &mut mem.data;
4915 let a = _mm_setr_pd(1.0, 2.0);
4916 let d = vals.as_mut_ptr();
4917
4918 _mm_store_pd1(d, *black_box(&a));
4919 assert_eq!(vals[0], 1.0);
4920 assert_eq!(vals[1], 1.0);
4921 }
4922
4923 #[simd_test(enable = "sse2")]
4924 unsafe fn test_mm_storer_pd() {
4925 let mut mem = Memory { data: [0.0f64; 4] };
4926 let vals = &mut mem.data;
4927 let a = _mm_setr_pd(1.0, 2.0);
4928 let d = vals.as_mut_ptr();
4929
4930 _mm_storer_pd(d, *black_box(&a));
4931 assert_eq!(vals[0], 2.0);
4932 assert_eq!(vals[1], 1.0);
4933 }
4934
4935 #[simd_test(enable = "sse2")]
4936 unsafe fn test_mm_storeh_pd() {
4937 let mut dest = 0.;
4938 let a = _mm_setr_pd(1., 2.);
4939 _mm_storeh_pd(&mut dest, a);
4940 assert_eq!(dest, get_m128d(a, 1));
4941 }
4942
4943 #[simd_test(enable = "sse2")]
4944 unsafe fn test_mm_storel_pd() {
4945 let mut dest = 0.;
4946 let a = _mm_setr_pd(1., 2.);
4947 _mm_storel_pd(&mut dest, a);
4948 assert_eq!(dest, _mm_cvtsd_f64(a));
4949 }
4950
4951 #[simd_test(enable = "sse2")]
4952 unsafe fn test_mm_loadr_pd() {
4953 let mut mem = Memory {
4954 data: [1.0f64, 2.0, 3.0, 4.0],
4955 };
4956 let vals = &mut mem.data;
4957 let d = vals.as_ptr();
4958
4959 let r = _mm_loadr_pd(d);
4960 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4961 }
4962
4963 #[simd_test(enable = "sse2")]
4964 unsafe fn test_mm_loadu_pd() {
4965 let mut mem = Memory {
4966 data: [1.0f64, 2.0, 3.0, 4.0],
4967 };
4968 let vals = &mut mem.data;
4969 let mut d = vals.as_ptr();
4970
4971 let mut offset = 0;
4973 if (d as usize) & 0xf == 0 {
4974 offset = 1;
4975 d = d.add(offset);
4976 }
4977
4978 let r = _mm_loadu_pd(d);
4979 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4980 assert_eq_m128d(r, e);
4981 }
4982
4983 #[simd_test(enable = "sse2")]
4984 unsafe fn test_mm_loadu_si16() {
4985 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4986 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4987 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4988 }
4989
4990 #[simd_test(enable = "sse2")]
4991 unsafe fn test_mm_loadu_si32() {
4992 let a = _mm_setr_epi32(1, 2, 3, 4);
4993 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4994 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4995 }
4996
4997 #[simd_test(enable = "sse2")]
4998 unsafe fn test_mm_loadu_si64() {
4999 let a = _mm_setr_epi64x(5, 6);
5000 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
5001 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5002 }
5003
5004 #[simd_test(enable = "sse2")]
5005 unsafe fn test_mm_cvtpd_ps() {
5006 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5007 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5008
5009 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5010 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5011
5012 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5013 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5014
5015 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5016 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5017 }
5018
5019 #[simd_test(enable = "sse2")]
5020 unsafe fn test_mm_cvtps_pd() {
5021 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5022 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5023
5024 let r = _mm_cvtps_pd(_mm_setr_ps(
5025 f32::MAX,
5026 f32::INFINITY,
5027 f32::NEG_INFINITY,
5028 f32::MIN,
5029 ));
5030 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5031 }
5032
5033 #[simd_test(enable = "sse2")]
5034 unsafe fn test_mm_cvtpd_epi32() {
5035 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5036 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5037
5038 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5039 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5040
5041 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5042 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5043
5044 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5045 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5046
5047 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5048 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5049 }
5050
5051 #[simd_test(enable = "sse2")]
5052 unsafe fn test_mm_cvtsd_si32() {
5053 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5054 assert_eq!(r, -2);
5055
5056 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5057 assert_eq!(r, i32::MIN);
5058
5059 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5060 assert_eq!(r, i32::MIN);
5061 }
5062
5063 #[simd_test(enable = "sse2")]
5064 unsafe fn test_mm_cvtsd_ss() {
5065 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5066 let b = _mm_setr_pd(2.0, -5.0);
5067
5068 let r = _mm_cvtsd_ss(a, b);
5069
5070 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5071
5072 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5073 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5074
5075 let r = _mm_cvtsd_ss(a, b);
5076
5077 assert_eq_m128(
5078 r,
5079 _mm_setr_ps(
5080 f32::INFINITY,
5081 f32::NEG_INFINITY,
5082 f32::MAX,
5083 f32::NEG_INFINITY,
5084 ),
5085 );
5086 }
5087
5088 #[simd_test(enable = "sse2")]
5089 unsafe fn test_mm_cvtsd_f64() {
5090 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5091 assert_eq!(r, -1.1);
5092 }
5093
5094 #[simd_test(enable = "sse2")]
5095 unsafe fn test_mm_cvtss_sd() {
5096 let a = _mm_setr_pd(-1.1, 2.2);
5097 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5098
5099 let r = _mm_cvtss_sd(a, b);
5100 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5101
5102 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5103 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5104
5105 let r = _mm_cvtss_sd(a, b);
5106 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5107 }
5108
5109 #[simd_test(enable = "sse2")]
5110 unsafe fn test_mm_cvttpd_epi32() {
5111 let a = _mm_setr_pd(-1.1, 2.2);
5112 let r = _mm_cvttpd_epi32(a);
5113 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5114
5115 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5116 let r = _mm_cvttpd_epi32(a);
5117 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5118 }
5119
5120 #[simd_test(enable = "sse2")]
5121 unsafe fn test_mm_cvttsd_si32() {
5122 let a = _mm_setr_pd(-1.1, 2.2);
5123 let r = _mm_cvttsd_si32(a);
5124 assert_eq!(r, -1);
5125
5126 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5127 let r = _mm_cvttsd_si32(a);
5128 assert_eq!(r, i32::MIN);
5129 }
5130
5131 #[simd_test(enable = "sse2")]
5132 unsafe fn test_mm_cvttps_epi32() {
5133 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5134 let r = _mm_cvttps_epi32(a);
5135 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5136
5137 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5138 let r = _mm_cvttps_epi32(a);
5139 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5140 }
5141
5142 #[simd_test(enable = "sse2")]
5143 unsafe fn test_mm_set_sd() {
5144 let r = _mm_set_sd(-1.0_f64);
5145 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5146 }
5147
5148 #[simd_test(enable = "sse2")]
5149 unsafe fn test_mm_set1_pd() {
5150 let r = _mm_set1_pd(-1.0_f64);
5151 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5152 }
5153
5154 #[simd_test(enable = "sse2")]
5155 unsafe fn test_mm_set_pd1() {
5156 let r = _mm_set_pd1(-2.0_f64);
5157 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5158 }
5159
5160 #[simd_test(enable = "sse2")]
5161 unsafe fn test_mm_set_pd() {
5162 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5163 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5164 }
5165
5166 #[simd_test(enable = "sse2")]
5167 unsafe fn test_mm_setr_pd() {
5168 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5169 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5170 }
5171
5172 #[simd_test(enable = "sse2")]
5173 unsafe fn test_mm_setzero_pd() {
5174 let r = _mm_setzero_pd();
5175 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5176 }
5177
5178 #[simd_test(enable = "sse2")]
5179 unsafe fn test_mm_load1_pd() {
5180 let d = -5.0;
5181 let r = _mm_load1_pd(&d);
5182 assert_eq_m128d(r, _mm_setr_pd(d, d));
5183 }
5184
5185 #[simd_test(enable = "sse2")]
5186 unsafe fn test_mm_load_pd1() {
5187 let d = -5.0;
5188 let r = _mm_load_pd1(&d);
5189 assert_eq_m128d(r, _mm_setr_pd(d, d));
5190 }
5191
5192 #[simd_test(enable = "sse2")]
5193 unsafe fn test_mm_unpackhi_pd() {
5194 let a = _mm_setr_pd(1.0, 2.0);
5195 let b = _mm_setr_pd(3.0, 4.0);
5196 let r = _mm_unpackhi_pd(a, b);
5197 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5198 }
5199
5200 #[simd_test(enable = "sse2")]
5201 unsafe fn test_mm_unpacklo_pd() {
5202 let a = _mm_setr_pd(1.0, 2.0);
5203 let b = _mm_setr_pd(3.0, 4.0);
5204 let r = _mm_unpacklo_pd(a, b);
5205 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5206 }
5207
5208 #[simd_test(enable = "sse2")]
5209 unsafe fn test_mm_shuffle_pd() {
5210 let a = _mm_setr_pd(1., 2.);
5211 let b = _mm_setr_pd(3., 4.);
5212 let expected = _mm_setr_pd(1., 3.);
5213 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5214 assert_eq_m128d(r, expected);
5215 }
5216
5217 #[simd_test(enable = "sse2")]
5218 unsafe fn test_mm_move_sd() {
5219 let a = _mm_setr_pd(1., 2.);
5220 let b = _mm_setr_pd(3., 4.);
5221 let expected = _mm_setr_pd(3., 2.);
5222 let r = _mm_move_sd(a, b);
5223 assert_eq_m128d(r, expected);
5224 }
5225
5226 #[simd_test(enable = "sse2")]
5227 unsafe fn test_mm_castpd_ps() {
5228 let a = _mm_set1_pd(0.);
5229 let expected = _mm_set1_ps(0.);
5230 let r = _mm_castpd_ps(a);
5231 assert_eq_m128(r, expected);
5232 }
5233
5234 #[simd_test(enable = "sse2")]
5235 unsafe fn test_mm_castpd_si128() {
5236 let a = _mm_set1_pd(0.);
5237 let expected = _mm_set1_epi64x(0);
5238 let r = _mm_castpd_si128(a);
5239 assert_eq_m128i(r, expected);
5240 }
5241
5242 #[simd_test(enable = "sse2")]
5243 unsafe fn test_mm_castps_pd() {
5244 let a = _mm_set1_ps(0.);
5245 let expected = _mm_set1_pd(0.);
5246 let r = _mm_castps_pd(a);
5247 assert_eq_m128d(r, expected);
5248 }
5249
5250 #[simd_test(enable = "sse2")]
5251 unsafe fn test_mm_castps_si128() {
5252 let a = _mm_set1_ps(0.);
5253 let expected = _mm_set1_epi32(0);
5254 let r = _mm_castps_si128(a);
5255 assert_eq_m128i(r, expected);
5256 }
5257
5258 #[simd_test(enable = "sse2")]
5259 unsafe fn test_mm_castsi128_pd() {
5260 let a = _mm_set1_epi64x(0);
5261 let expected = _mm_set1_pd(0.);
5262 let r = _mm_castsi128_pd(a);
5263 assert_eq_m128d(r, expected);
5264 }
5265
5266 #[simd_test(enable = "sse2")]
5267 unsafe fn test_mm_castsi128_ps() {
5268 let a = _mm_set1_epi32(0);
5269 let expected = _mm_set1_ps(0.);
5270 let r = _mm_castsi128_ps(a);
5271 assert_eq_m128(r, expected);
5272 }
5273}