1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7    core_arch::{simd::*, x86::*},
8    intrinsics::simd::*,
9    intrinsics::sqrtf64,
10    mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23    pause()
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37    clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53    lfence()
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69    mfence()
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80    unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91    unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102    unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113    unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124    unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135    unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146    unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157    unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168    unsafe {
169        let a = simd_cast::<_, u16x16>(a.as_u8x16());
170        let b = simd_cast::<_, u16x16>(b.as_u8x16());
171        let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172        transmute(simd_cast::<_, u8x16>(r))
173    }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184    unsafe {
185        let a = simd_cast::<_, u32x8>(a.as_u16x8());
186        let b = simd_cast::<_, u32x8>(b.as_u16x8());
187        let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188        transmute(simd_cast::<_, u16x8>(r))
189    }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204    unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
212#[target_feature(enable = "sse2")]
213#[cfg_attr(test, assert_instr(pmaxsw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216    unsafe {
217        let a = a.as_i16x8();
218        let b = b.as_i16x8();
219        transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
220    }
221}
222
223#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxub))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232    unsafe {
233        let a = a.as_u8x16();
234        let b = b.as_u8x16();
235        transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
236    }
237}
238
239#[inline]
244#[target_feature(enable = "sse2")]
245#[cfg_attr(test, assert_instr(pminsw))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248    unsafe {
249        let a = a.as_i16x8();
250        let b = b.as_i16x8();
251        transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
252    }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminub))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264    unsafe {
265        let a = a.as_u8x16();
266        let b = b.as_u8x16();
267        transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
268    }
269}
270
271#[inline]
278#[target_feature(enable = "sse2")]
279#[cfg_attr(test, assert_instr(pmulhw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282    unsafe {
283        let a = simd_cast::<_, i32x8>(a.as_i16x8());
284        let b = simd_cast::<_, i32x8>(b.as_i16x8());
285        let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
286        transmute(simd_cast::<i32x8, i16x8>(r))
287    }
288}
289
290#[inline]
297#[target_feature(enable = "sse2")]
298#[cfg_attr(test, assert_instr(pmulhuw))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301    unsafe {
302        let a = simd_cast::<_, u32x8>(a.as_u16x8());
303        let b = simd_cast::<_, u32x8>(b.as_u16x8());
304        let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
305        transmute(simd_cast::<u32x8, u16x8>(r))
306    }
307}
308
309#[inline]
316#[target_feature(enable = "sse2")]
317#[cfg_attr(test, assert_instr(pmullw))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320    unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
321}
322
323#[inline]
330#[target_feature(enable = "sse2")]
331#[cfg_attr(test, assert_instr(pmuludq))]
332#[stable(feature = "simd_x86", since = "1.27.0")]
333pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334    unsafe {
335        let a = a.as_u64x2();
336        let b = b.as_u64x2();
337        let mask = u64x2::splat(u32::MAX.into());
338        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
339    }
340}
341
342#[inline]
351#[target_feature(enable = "sse2")]
352#[cfg_attr(test, assert_instr(psadbw))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355    unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
356}
357
358#[inline]
362#[target_feature(enable = "sse2")]
363#[cfg_attr(test, assert_instr(psubb))]
364#[stable(feature = "simd_x86", since = "1.27.0")]
365pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366    unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
367}
368
369#[inline]
373#[target_feature(enable = "sse2")]
374#[cfg_attr(test, assert_instr(psubw))]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377    unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
378}
379
380#[inline]
384#[target_feature(enable = "sse2")]
385#[cfg_attr(test, assert_instr(psubd))]
386#[stable(feature = "simd_x86", since = "1.27.0")]
387pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388    unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
389}
390
391#[inline]
395#[target_feature(enable = "sse2")]
396#[cfg_attr(test, assert_instr(psubq))]
397#[stable(feature = "simd_x86", since = "1.27.0")]
398pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399    unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
400}
401
402#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubsb))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411    unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
412}
413
414#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsw))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423    unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubusb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435    unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
436}
437
438#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusw))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447    unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
448}
449
450#[inline]
454#[target_feature(enable = "sse2")]
455#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
456#[rustc_legacy_const_generics(1)]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459    static_assert_uimm_bits!(IMM8, 8);
460    unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461}
462
463#[inline]
466#[target_feature(enable = "sse2")]
467unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468    const fn mask(shift: i32, i: u32) -> u32 {
469        let shift = shift as u32 & 0xff;
470        if shift > 15 { i } else { 16 - shift + i }
471    }
472    transmute::<i8x16, _>(simd_shuffle!(
473        i8x16::ZERO,
474        a.as_i8x16(),
475        [
476            mask(IMM8, 0),
477            mask(IMM8, 1),
478            mask(IMM8, 2),
479            mask(IMM8, 3),
480            mask(IMM8, 4),
481            mask(IMM8, 5),
482            mask(IMM8, 6),
483            mask(IMM8, 7),
484            mask(IMM8, 8),
485            mask(IMM8, 9),
486            mask(IMM8, 10),
487            mask(IMM8, 11),
488            mask(IMM8, 12),
489            mask(IMM8, 13),
490            mask(IMM8, 14),
491            mask(IMM8, 15),
492        ],
493    ))
494}
495
496#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505    unsafe {
506        static_assert_uimm_bits!(IMM8, 8);
507        _mm_slli_si128_impl::<IMM8>(a)
508    }
509}
510
511#[inline]
515#[target_feature(enable = "sse2")]
516#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
517#[rustc_legacy_const_generics(1)]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520    unsafe {
521        static_assert_uimm_bits!(IMM8, 8);
522        _mm_srli_si128_impl::<IMM8>(a)
523    }
524}
525
526#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535    static_assert_uimm_bits!(IMM8, 8);
536    unsafe {
537        if IMM8 >= 16 {
538            _mm_setzero_si128()
539        } else {
540            transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
541        }
542    }
543}
544
545#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw))]
552#[stable(feature = "simd_x86", since = "1.27.0")]
553pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554    unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
555}
556
557#[inline]
561#[target_feature(enable = "sse2")]
562#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
563#[rustc_legacy_const_generics(1)]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566    static_assert_uimm_bits!(IMM8, 8);
567    unsafe {
568        if IMM8 >= 32 {
569            _mm_setzero_si128()
570        } else {
571            transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
572        }
573    }
574}
575
576#[inline]
581#[target_feature(enable = "sse2")]
582#[cfg_attr(test, assert_instr(pslld))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585    unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
586}
587
588#[inline]
592#[target_feature(enable = "sse2")]
593#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
594#[rustc_legacy_const_generics(1)]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597    static_assert_uimm_bits!(IMM8, 8);
598    unsafe {
599        if IMM8 >= 64 {
600            _mm_setzero_si128()
601        } else {
602            transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
603        }
604    }
605}
606
607#[inline]
612#[target_feature(enable = "sse2")]
613#[cfg_attr(test, assert_instr(psllq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616    unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
617}
618
619#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
626#[rustc_legacy_const_generics(1)]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629    static_assert_uimm_bits!(IMM8, 8);
630    unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
631}
632
633#[inline]
638#[target_feature(enable = "sse2")]
639#[cfg_attr(test, assert_instr(psraw))]
640#[stable(feature = "simd_x86", since = "1.27.0")]
641pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642    unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
643}
644
645#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
652#[rustc_legacy_const_generics(1)]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655    static_assert_uimm_bits!(IMM8, 8);
656    unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
657}
658
659#[inline]
664#[target_feature(enable = "sse2")]
665#[cfg_attr(test, assert_instr(psrad))]
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668    unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
669}
670
671#[inline]
675#[target_feature(enable = "sse2")]
676#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
677#[rustc_legacy_const_generics(1)]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680    static_assert_uimm_bits!(IMM8, 8);
681    unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682}
683
684#[inline]
687#[target_feature(enable = "sse2")]
688unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689    const fn mask(shift: i32, i: u32) -> u32 {
690        if (shift as u32) > 15 {
691            i + 16
692        } else {
693            i + (shift as u32)
694        }
695    }
696    let x: i8x16 = simd_shuffle!(
697        a.as_i8x16(),
698        i8x16::ZERO,
699        [
700            mask(IMM8, 0),
701            mask(IMM8, 1),
702            mask(IMM8, 2),
703            mask(IMM8, 3),
704            mask(IMM8, 4),
705            mask(IMM8, 5),
706            mask(IMM8, 6),
707            mask(IMM8, 7),
708            mask(IMM8, 8),
709            mask(IMM8, 9),
710            mask(IMM8, 10),
711            mask(IMM8, 11),
712            mask(IMM8, 12),
713            mask(IMM8, 13),
714            mask(IMM8, 14),
715            mask(IMM8, 15),
716        ],
717    );
718    transmute(x)
719}
720
721#[inline]
726#[target_feature(enable = "sse2")]
727#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
728#[rustc_legacy_const_generics(1)]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731    static_assert_uimm_bits!(IMM8, 8);
732    unsafe {
733        if IMM8 >= 16 {
734            _mm_setzero_si128()
735        } else {
736            transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
737        }
738    }
739}
740
741#[inline]
746#[target_feature(enable = "sse2")]
747#[cfg_attr(test, assert_instr(psrlw))]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750    unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
751}
752
753#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
760#[rustc_legacy_const_generics(1)]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763    static_assert_uimm_bits!(IMM8, 8);
764    unsafe {
765        if IMM8 >= 32 {
766            _mm_setzero_si128()
767        } else {
768            transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
769        }
770    }
771}
772
773#[inline]
778#[target_feature(enable = "sse2")]
779#[cfg_attr(test, assert_instr(psrld))]
780#[stable(feature = "simd_x86", since = "1.27.0")]
781pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782    unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
783}
784
785#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
792#[rustc_legacy_const_generics(1)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795    static_assert_uimm_bits!(IMM8, 8);
796    unsafe {
797        if IMM8 >= 64 {
798            _mm_setzero_si128()
799        } else {
800            transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
801        }
802    }
803}
804
805#[inline]
810#[target_feature(enable = "sse2")]
811#[cfg_attr(test, assert_instr(psrlq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814    unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
815}
816
817#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(andps))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826    unsafe { simd_and(a, b) }
827}
828
829#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andnps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838    unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(orps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850    unsafe { simd_or(a, b) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(xorps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862    unsafe { simd_xor(a, b) }
863}
864
865#[inline]
869#[target_feature(enable = "sse2")]
870#[cfg_attr(test, assert_instr(pcmpeqb))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873    unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
874}
875
876#[inline]
880#[target_feature(enable = "sse2")]
881#[cfg_attr(test, assert_instr(pcmpeqw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884    unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
885}
886
887#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(pcmpeqd))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895    unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
896}
897
898#[inline]
902#[target_feature(enable = "sse2")]
903#[cfg_attr(test, assert_instr(pcmpgtb))]
904#[stable(feature = "simd_x86", since = "1.27.0")]
905pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906    unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
907}
908
909#[inline]
913#[target_feature(enable = "sse2")]
914#[cfg_attr(test, assert_instr(pcmpgtw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917    unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
918}
919
920#[inline]
924#[target_feature(enable = "sse2")]
925#[cfg_attr(test, assert_instr(pcmpgtd))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928    unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
929}
930
931#[inline]
935#[target_feature(enable = "sse2")]
936#[cfg_attr(test, assert_instr(pcmpgtb))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939    unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
940}
941
942#[inline]
946#[target_feature(enable = "sse2")]
947#[cfg_attr(test, assert_instr(pcmpgtw))]
948#[stable(feature = "simd_x86", since = "1.27.0")]
949pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950    unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtd))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961    unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
962}
963
964#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(cvtdq2pd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973    unsafe {
974        let a = a.as_i32x4();
975        simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
976    }
977}
978
979#[inline]
984#[target_feature(enable = "sse2")]
985#[cfg_attr(test, assert_instr(cvtsi2sd))]
986#[stable(feature = "simd_x86", since = "1.27.0")]
987pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988    unsafe { simd_insert!(a, 0, b as f64) }
989}
990
991#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtdq2ps))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000    unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtps2dq))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012    unsafe { transmute(cvtps2dq(a)) }
1013}
1014
1015#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1022pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023    unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1024}
1025
1026#[inline]
1030#[target_feature(enable = "sse2")]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033    unsafe { simd_extract!(a.as_i32x4(), 0) }
1034}
1035
1036#[inline]
1041#[target_feature(enable = "sse2")]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045    unsafe { transmute(i64x2::new(e0, e1)) }
1046}
1047
1048#[inline]
1052#[target_feature(enable = "sse2")]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056    unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1057}
1058
1059#[inline]
1063#[target_feature(enable = "sse2")]
1064#[stable(feature = "simd_x86", since = "1.27.0")]
1066pub fn _mm_set_epi16(
1067    e7: i16,
1068    e6: i16,
1069    e5: i16,
1070    e4: i16,
1071    e3: i16,
1072    e2: i16,
1073    e1: i16,
1074    e0: i16,
1075) -> __m128i {
1076    unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub fn _mm_set_epi8(
1087    e15: i8,
1088    e14: i8,
1089    e13: i8,
1090    e12: i8,
1091    e11: i8,
1092    e10: i8,
1093    e9: i8,
1094    e8: i8,
1095    e7: i8,
1096    e6: i8,
1097    e5: i8,
1098    e4: i8,
1099    e3: i8,
1100    e2: i8,
1101    e1: i8,
1102    e0: i8,
1103) -> __m128i {
1104    unsafe {
1105        #[rustfmt::skip]
1106        transmute(i8x16::new(
1107            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1108        ))
1109    }
1110}
1111
1112#[inline]
1116#[target_feature(enable = "sse2")]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120    _mm_set_epi64x(a, a)
1121}
1122
1123#[inline]
1127#[target_feature(enable = "sse2")]
1128#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131    _mm_set_epi32(a, a, a, a)
1132}
1133
1134#[inline]
1138#[target_feature(enable = "sse2")]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142    _mm_set_epi16(a, a, a, a, a, a, a, a)
1143}
1144
1145#[inline]
1149#[target_feature(enable = "sse2")]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153    _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1154}
1155
1156#[inline]
1160#[target_feature(enable = "sse2")]
1161#[stable(feature = "simd_x86", since = "1.27.0")]
1163pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164    _mm_set_epi32(e0, e1, e2, e3)
1165}
1166
1167#[inline]
1171#[target_feature(enable = "sse2")]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm_setr_epi16(
1175    e7: i16,
1176    e6: i16,
1177    e5: i16,
1178    e4: i16,
1179    e3: i16,
1180    e2: i16,
1181    e1: i16,
1182    e0: i16,
1183) -> __m128i {
1184    _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1185}
1186
1187#[inline]
1191#[target_feature(enable = "sse2")]
1192#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub fn _mm_setr_epi8(
1195    e15: i8,
1196    e14: i8,
1197    e13: i8,
1198    e12: i8,
1199    e11: i8,
1200    e10: i8,
1201    e9: i8,
1202    e8: i8,
1203    e7: i8,
1204    e6: i8,
1205    e5: i8,
1206    e4: i8,
1207    e3: i8,
1208    e2: i8,
1209    e1: i8,
1210    e0: i8,
1211) -> __m128i {
1212    #[rustfmt::skip]
1213    _mm_set_epi8(
1214        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1215    )
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(xorps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub fn _mm_setzero_si128() -> __m128i {
1226    const { unsafe { mem::zeroed() } }
1227}
1228
1229#[inline]
1233#[target_feature(enable = "sse2")]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236    _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1237}
1238
1239#[inline]
1245#[target_feature(enable = "sse2")]
1246#[cfg_attr(
1247    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1248    assert_instr(movaps)
1249)]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1252    *mem_addr
1253}
1254
1255#[inline]
1261#[target_feature(enable = "sse2")]
1262#[cfg_attr(test, assert_instr(movups))]
1263#[stable(feature = "simd_x86", since = "1.27.0")]
1264pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1265    let mut dst: __m128i = _mm_undefined_si128();
1266    ptr::copy_nonoverlapping(
1267        mem_addr as *const u8,
1268        ptr::addr_of_mut!(dst) as *mut u8,
1269        mem::size_of::<__m128i>(),
1270    );
1271    dst
1272}
1273
1274#[inline]
1294#[target_feature(enable = "sse2")]
1295#[cfg_attr(test, assert_instr(maskmovdqu))]
1296#[stable(feature = "simd_x86", since = "1.27.0")]
1297pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1298    maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1299}
1300
1301#[inline]
1307#[target_feature(enable = "sse2")]
1308#[cfg_attr(
1309    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1310    assert_instr(movaps)
1311)]
1312#[stable(feature = "simd_x86", since = "1.27.0")]
1313pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1314    *mem_addr = a;
1315}
1316
1317#[inline]
1323#[target_feature(enable = "sse2")]
1324#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1326pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1327    mem_addr.write_unaligned(a);
1328}
1329
1330#[inline]
1336#[target_feature(enable = "sse2")]
1337#[stable(feature = "simd_x86", since = "1.27.0")]
1338pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1339    ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1340}
1341
1342#[inline]
1357#[target_feature(enable = "sse2")]
1358#[cfg_attr(test, assert_instr(movntdq))]
1359#[stable(feature = "simd_x86", since = "1.27.0")]
1360pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1361    crate::arch::asm!(
1362        vps!("movntdq",  ",{a}"),
1363        p = in(reg) mem_addr,
1364        a = in(xmm_reg) a,
1365        options(nostack, preserves_flags),
1366    );
1367}
1368
1369#[inline]
1384#[target_feature(enable = "sse2")]
1385#[cfg_attr(test, assert_instr(movnti))]
1386#[stable(feature = "simd_x86", since = "1.27.0")]
1387pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1388    crate::arch::asm!(
1389        vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1391        a = in(reg) a,
1392        options(nostack, preserves_flags),
1393    );
1394}
1395
1396#[inline]
1401#[target_feature(enable = "sse2")]
1402#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1404#[stable(feature = "simd_x86", since = "1.27.0")]
1405pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1406    unsafe {
1407        let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1408        transmute(r)
1409    }
1410}
1411
1412#[inline]
1417#[target_feature(enable = "sse2")]
1418#[cfg_attr(test, assert_instr(packsswb))]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1421    unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1422}
1423
1424#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(packssdw))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1433    unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1434}
1435
1436#[inline]
1441#[target_feature(enable = "sse2")]
1442#[cfg_attr(test, assert_instr(packuswb))]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1445    unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1446}
1447
1448#[inline]
1452#[target_feature(enable = "sse2")]
1453#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1454#[rustc_legacy_const_generics(1)]
1455#[stable(feature = "simd_x86", since = "1.27.0")]
1456pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1457    static_assert_uimm_bits!(IMM8, 3);
1458    unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1459}
1460
1461#[inline]
1465#[target_feature(enable = "sse2")]
1466#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1467#[rustc_legacy_const_generics(2)]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1470    static_assert_uimm_bits!(IMM8, 3);
1471    unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1472}
1473
1474#[inline]
1478#[target_feature(enable = "sse2")]
1479#[cfg_attr(test, assert_instr(pmovmskb))]
1480#[stable(feature = "simd_x86", since = "1.27.0")]
1481pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1482    unsafe {
1483        let z = i8x16::ZERO;
1484        let m: i8x16 = simd_lt(a.as_i8x16(), z);
1485        simd_bitmask::<_, u16>(m) as u32 as i32
1486    }
1487}
1488
1489#[inline]
1493#[target_feature(enable = "sse2")]
1494#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1495#[rustc_legacy_const_generics(1)]
1496#[stable(feature = "simd_x86", since = "1.27.0")]
1497pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1498    static_assert_uimm_bits!(IMM8, 8);
1499    unsafe {
1500        let a = a.as_i32x4();
1501        let x: i32x4 = simd_shuffle!(
1502            a,
1503            a,
1504            [
1505                IMM8 as u32 & 0b11,
1506                (IMM8 as u32 >> 2) & 0b11,
1507                (IMM8 as u32 >> 4) & 0b11,
1508                (IMM8 as u32 >> 6) & 0b11,
1509            ],
1510        );
1511        transmute(x)
1512    }
1513}
1514
1515#[inline]
1523#[target_feature(enable = "sse2")]
1524#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1525#[rustc_legacy_const_generics(1)]
1526#[stable(feature = "simd_x86", since = "1.27.0")]
1527pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1528    static_assert_uimm_bits!(IMM8, 8);
1529    unsafe {
1530        let a = a.as_i16x8();
1531        let x: i16x8 = simd_shuffle!(
1532            a,
1533            a,
1534            [
1535                0,
1536                1,
1537                2,
1538                3,
1539                (IMM8 as u32 & 0b11) + 4,
1540                ((IMM8 as u32 >> 2) & 0b11) + 4,
1541                ((IMM8 as u32 >> 4) & 0b11) + 4,
1542                ((IMM8 as u32 >> 6) & 0b11) + 4,
1543            ],
1544        );
1545        transmute(x)
1546    }
1547}
1548
1549#[inline]
1557#[target_feature(enable = "sse2")]
1558#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1559#[rustc_legacy_const_generics(1)]
1560#[stable(feature = "simd_x86", since = "1.27.0")]
1561pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1562    static_assert_uimm_bits!(IMM8, 8);
1563    unsafe {
1564        let a = a.as_i16x8();
1565        let x: i16x8 = simd_shuffle!(
1566            a,
1567            a,
1568            [
1569                IMM8 as u32 & 0b11,
1570                (IMM8 as u32 >> 2) & 0b11,
1571                (IMM8 as u32 >> 4) & 0b11,
1572                (IMM8 as u32 >> 6) & 0b11,
1573                4,
1574                5,
1575                6,
1576                7,
1577            ],
1578        );
1579        transmute(x)
1580    }
1581}
1582
1583#[inline]
1587#[target_feature(enable = "sse2")]
1588#[cfg_attr(test, assert_instr(punpckhbw))]
1589#[stable(feature = "simd_x86", since = "1.27.0")]
1590pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1591    unsafe {
1592        transmute::<i8x16, _>(simd_shuffle!(
1593            a.as_i8x16(),
1594            b.as_i8x16(),
1595            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1596        ))
1597    }
1598}
1599
1600#[inline]
1604#[target_feature(enable = "sse2")]
1605#[cfg_attr(test, assert_instr(punpckhwd))]
1606#[stable(feature = "simd_x86", since = "1.27.0")]
1607pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1608    unsafe {
1609        let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1610        transmute::<i16x8, _>(x)
1611    }
1612}
1613
1614#[inline]
1618#[target_feature(enable = "sse2")]
1619#[cfg_attr(test, assert_instr(unpckhps))]
1620#[stable(feature = "simd_x86", since = "1.27.0")]
1621pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1622    unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1623}
1624
1625#[inline]
1629#[target_feature(enable = "sse2")]
1630#[cfg_attr(test, assert_instr(unpckhpd))]
1631#[stable(feature = "simd_x86", since = "1.27.0")]
1632pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1633    unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1634}
1635
1636#[inline]
1640#[target_feature(enable = "sse2")]
1641#[cfg_attr(test, assert_instr(punpcklbw))]
1642#[stable(feature = "simd_x86", since = "1.27.0")]
1643pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1644    unsafe {
1645        transmute::<i8x16, _>(simd_shuffle!(
1646            a.as_i8x16(),
1647            b.as_i8x16(),
1648            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1649        ))
1650    }
1651}
1652
1653#[inline]
1657#[target_feature(enable = "sse2")]
1658#[cfg_attr(test, assert_instr(punpcklwd))]
1659#[stable(feature = "simd_x86", since = "1.27.0")]
1660pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1661    unsafe {
1662        let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1663        transmute::<i16x8, _>(x)
1664    }
1665}
1666
1667#[inline]
1671#[target_feature(enable = "sse2")]
1672#[cfg_attr(test, assert_instr(unpcklps))]
1673#[stable(feature = "simd_x86", since = "1.27.0")]
1674pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1675    unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1676}
1677
1678#[inline]
1682#[target_feature(enable = "sse2")]
1683#[cfg_attr(test, assert_instr(movlhps))]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1686    unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1687}
1688
1689#[inline]
1694#[target_feature(enable = "sse2")]
1695#[cfg_attr(test, assert_instr(addsd))]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1698    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1699}
1700
1701#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(addpd))]
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1710    unsafe { simd_add(a, b) }
1711}
1712
1713#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(divsd))]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1722    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1723}
1724
1725#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(divpd))]
1732#[stable(feature = "simd_x86", since = "1.27.0")]
1733pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1734    unsafe { simd_div(a, b) }
1735}
1736
1737#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(maxsd))]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1746    unsafe { maxsd(a, b) }
1747}
1748
1749#[inline]
1754#[target_feature(enable = "sse2")]
1755#[cfg_attr(test, assert_instr(maxpd))]
1756#[stable(feature = "simd_x86", since = "1.27.0")]
1757pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1758    unsafe { maxpd(a, b) }
1759}
1760
1761#[inline]
1766#[target_feature(enable = "sse2")]
1767#[cfg_attr(test, assert_instr(minsd))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1770    unsafe { minsd(a, b) }
1771}
1772
1773#[inline]
1778#[target_feature(enable = "sse2")]
1779#[cfg_attr(test, assert_instr(minpd))]
1780#[stable(feature = "simd_x86", since = "1.27.0")]
1781pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1782    unsafe { minpd(a, b) }
1783}
1784
1785#[inline]
1790#[target_feature(enable = "sse2")]
1791#[cfg_attr(test, assert_instr(mulsd))]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1794    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1795}
1796
1797#[inline]
1802#[target_feature(enable = "sse2")]
1803#[cfg_attr(test, assert_instr(mulpd))]
1804#[stable(feature = "simd_x86", since = "1.27.0")]
1805pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1806    unsafe { simd_mul(a, b) }
1807}
1808
1809#[inline]
1814#[target_feature(enable = "sse2")]
1815#[cfg_attr(test, assert_instr(sqrtsd))]
1816#[stable(feature = "simd_x86", since = "1.27.0")]
1817pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1818    unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1819}
1820
1821#[inline]
1825#[target_feature(enable = "sse2")]
1826#[cfg_attr(test, assert_instr(sqrtpd))]
1827#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1829    unsafe { simd_fsqrt(a) }
1830}
1831
1832#[inline]
1837#[target_feature(enable = "sse2")]
1838#[cfg_attr(test, assert_instr(subsd))]
1839#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1841    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1842}
1843
1844#[inline]
1849#[target_feature(enable = "sse2")]
1850#[cfg_attr(test, assert_instr(subpd))]
1851#[stable(feature = "simd_x86", since = "1.27.0")]
1852pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1853    unsafe { simd_sub(a, b) }
1854}
1855
1856#[inline]
1861#[target_feature(enable = "sse2")]
1862#[cfg_attr(test, assert_instr(andps))]
1863#[stable(feature = "simd_x86", since = "1.27.0")]
1864pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1865    unsafe {
1866        let a: __m128i = transmute(a);
1867        let b: __m128i = transmute(b);
1868        transmute(_mm_and_si128(a, b))
1869    }
1870}
1871
1872#[inline]
1876#[target_feature(enable = "sse2")]
1877#[cfg_attr(test, assert_instr(andnps))]
1878#[stable(feature = "simd_x86", since = "1.27.0")]
1879pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1880    unsafe {
1881        let a: __m128i = transmute(a);
1882        let b: __m128i = transmute(b);
1883        transmute(_mm_andnot_si128(a, b))
1884    }
1885}
1886
1887#[inline]
1891#[target_feature(enable = "sse2")]
1892#[cfg_attr(test, assert_instr(orps))]
1893#[stable(feature = "simd_x86", since = "1.27.0")]
1894pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1895    unsafe {
1896        let a: __m128i = transmute(a);
1897        let b: __m128i = transmute(b);
1898        transmute(_mm_or_si128(a, b))
1899    }
1900}
1901
1902#[inline]
1906#[target_feature(enable = "sse2")]
1907#[cfg_attr(test, assert_instr(xorps))]
1908#[stable(feature = "simd_x86", since = "1.27.0")]
1909pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1910    unsafe {
1911        let a: __m128i = transmute(a);
1912        let b: __m128i = transmute(b);
1913        transmute(_mm_xor_si128(a, b))
1914    }
1915}
1916
1917#[inline]
1922#[target_feature(enable = "sse2")]
1923#[cfg_attr(test, assert_instr(cmpeqsd))]
1924#[stable(feature = "simd_x86", since = "1.27.0")]
1925pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1926    unsafe { cmpsd(a, b, 0) }
1927}
1928
1929#[inline]
1934#[target_feature(enable = "sse2")]
1935#[cfg_attr(test, assert_instr(cmpltsd))]
1936#[stable(feature = "simd_x86", since = "1.27.0")]
1937pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1938    unsafe { cmpsd(a, b, 1) }
1939}
1940
1941#[inline]
1946#[target_feature(enable = "sse2")]
1947#[cfg_attr(test, assert_instr(cmplesd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1950    unsafe { cmpsd(a, b, 2) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(cmpltsd))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1962    unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1963}
1964
1965#[inline]
1970#[target_feature(enable = "sse2")]
1971#[cfg_attr(test, assert_instr(cmplesd))]
1972#[stable(feature = "simd_x86", since = "1.27.0")]
1973pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1974    unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1975}
1976
1977#[inline]
1984#[target_feature(enable = "sse2")]
1985#[cfg_attr(test, assert_instr(cmpordsd))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1988    unsafe { cmpsd(a, b, 7) }
1989}
1990
1991#[inline]
1997#[target_feature(enable = "sse2")]
1998#[cfg_attr(test, assert_instr(cmpunordsd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2001    unsafe { cmpsd(a, b, 3) }
2002}
2003
2004#[inline]
2009#[target_feature(enable = "sse2")]
2010#[cfg_attr(test, assert_instr(cmpneqsd))]
2011#[stable(feature = "simd_x86", since = "1.27.0")]
2012pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2013    unsafe { cmpsd(a, b, 4) }
2014}
2015
2016#[inline]
2021#[target_feature(enable = "sse2")]
2022#[cfg_attr(test, assert_instr(cmpnltsd))]
2023#[stable(feature = "simd_x86", since = "1.27.0")]
2024pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2025    unsafe { cmpsd(a, b, 5) }
2026}
2027
2028#[inline]
2033#[target_feature(enable = "sse2")]
2034#[cfg_attr(test, assert_instr(cmpnlesd))]
2035#[stable(feature = "simd_x86", since = "1.27.0")]
2036pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2037    unsafe { cmpsd(a, b, 6) }
2038}
2039
2040#[inline]
2045#[target_feature(enable = "sse2")]
2046#[cfg_attr(test, assert_instr(cmpnltsd))]
2047#[stable(feature = "simd_x86", since = "1.27.0")]
2048pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2049    unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2050}
2051
2052#[inline]
2057#[target_feature(enable = "sse2")]
2058#[cfg_attr(test, assert_instr(cmpnlesd))]
2059#[stable(feature = "simd_x86", since = "1.27.0")]
2060pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2061    unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2062}
2063
2064#[inline]
2068#[target_feature(enable = "sse2")]
2069#[cfg_attr(test, assert_instr(cmpeqpd))]
2070#[stable(feature = "simd_x86", since = "1.27.0")]
2071pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2072    unsafe { cmppd(a, b, 0) }
2073}
2074
2075#[inline]
2079#[target_feature(enable = "sse2")]
2080#[cfg_attr(test, assert_instr(cmpltpd))]
2081#[stable(feature = "simd_x86", since = "1.27.0")]
2082pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2083    unsafe { cmppd(a, b, 1) }
2084}
2085
2086#[inline]
2090#[target_feature(enable = "sse2")]
2091#[cfg_attr(test, assert_instr(cmplepd))]
2092#[stable(feature = "simd_x86", since = "1.27.0")]
2093pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2094    unsafe { cmppd(a, b, 2) }
2095}
2096
2097#[inline]
2101#[target_feature(enable = "sse2")]
2102#[cfg_attr(test, assert_instr(cmpltpd))]
2103#[stable(feature = "simd_x86", since = "1.27.0")]
2104pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2105    _mm_cmplt_pd(b, a)
2106}
2107
2108#[inline]
2112#[target_feature(enable = "sse2")]
2113#[cfg_attr(test, assert_instr(cmplepd))]
2114#[stable(feature = "simd_x86", since = "1.27.0")]
2115pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2116    _mm_cmple_pd(b, a)
2117}
2118
2119#[inline]
2123#[target_feature(enable = "sse2")]
2124#[cfg_attr(test, assert_instr(cmpordpd))]
2125#[stable(feature = "simd_x86", since = "1.27.0")]
2126pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2127    unsafe { cmppd(a, b, 7) }
2128}
2129
2130#[inline]
2134#[target_feature(enable = "sse2")]
2135#[cfg_attr(test, assert_instr(cmpunordpd))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2138    unsafe { cmppd(a, b, 3) }
2139}
2140
2141#[inline]
2145#[target_feature(enable = "sse2")]
2146#[cfg_attr(test, assert_instr(cmpneqpd))]
2147#[stable(feature = "simd_x86", since = "1.27.0")]
2148pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2149    unsafe { cmppd(a, b, 4) }
2150}
2151
2152#[inline]
2156#[target_feature(enable = "sse2")]
2157#[cfg_attr(test, assert_instr(cmpnltpd))]
2158#[stable(feature = "simd_x86", since = "1.27.0")]
2159pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2160    unsafe { cmppd(a, b, 5) }
2161}
2162
2163#[inline]
2167#[target_feature(enable = "sse2")]
2168#[cfg_attr(test, assert_instr(cmpnlepd))]
2169#[stable(feature = "simd_x86", since = "1.27.0")]
2170pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2171    unsafe { cmppd(a, b, 6) }
2172}
2173
2174#[inline]
2178#[target_feature(enable = "sse2")]
2179#[cfg_attr(test, assert_instr(cmpnltpd))]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2182    _mm_cmpnlt_pd(b, a)
2183}
2184
2185#[inline]
2190#[target_feature(enable = "sse2")]
2191#[cfg_attr(test, assert_instr(cmpnlepd))]
2192#[stable(feature = "simd_x86", since = "1.27.0")]
2193pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2194    _mm_cmpnle_pd(b, a)
2195}
2196
2197#[inline]
2201#[target_feature(enable = "sse2")]
2202#[cfg_attr(test, assert_instr(comisd))]
2203#[stable(feature = "simd_x86", since = "1.27.0")]
2204pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2205    unsafe { comieqsd(a, b) }
2206}
2207
2208#[inline]
2212#[target_feature(enable = "sse2")]
2213#[cfg_attr(test, assert_instr(comisd))]
2214#[stable(feature = "simd_x86", since = "1.27.0")]
2215pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2216    unsafe { comiltsd(a, b) }
2217}
2218
2219#[inline]
2223#[target_feature(enable = "sse2")]
2224#[cfg_attr(test, assert_instr(comisd))]
2225#[stable(feature = "simd_x86", since = "1.27.0")]
2226pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2227    unsafe { comilesd(a, b) }
2228}
2229
2230#[inline]
2234#[target_feature(enable = "sse2")]
2235#[cfg_attr(test, assert_instr(comisd))]
2236#[stable(feature = "simd_x86", since = "1.27.0")]
2237pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2238    unsafe { comigtsd(a, b) }
2239}
2240
2241#[inline]
2245#[target_feature(enable = "sse2")]
2246#[cfg_attr(test, assert_instr(comisd))]
2247#[stable(feature = "simd_x86", since = "1.27.0")]
2248pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2249    unsafe { comigesd(a, b) }
2250}
2251
2252#[inline]
2256#[target_feature(enable = "sse2")]
2257#[cfg_attr(test, assert_instr(comisd))]
2258#[stable(feature = "simd_x86", since = "1.27.0")]
2259pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2260    unsafe { comineqsd(a, b) }
2261}
2262
2263#[inline]
2267#[target_feature(enable = "sse2")]
2268#[cfg_attr(test, assert_instr(ucomisd))]
2269#[stable(feature = "simd_x86", since = "1.27.0")]
2270pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2271    unsafe { ucomieqsd(a, b) }
2272}
2273
2274#[inline]
2278#[target_feature(enable = "sse2")]
2279#[cfg_attr(test, assert_instr(ucomisd))]
2280#[stable(feature = "simd_x86", since = "1.27.0")]
2281pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2282    unsafe { ucomiltsd(a, b) }
2283}
2284
2285#[inline]
2289#[target_feature(enable = "sse2")]
2290#[cfg_attr(test, assert_instr(ucomisd))]
2291#[stable(feature = "simd_x86", since = "1.27.0")]
2292pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2293    unsafe { ucomilesd(a, b) }
2294}
2295
2296#[inline]
2300#[target_feature(enable = "sse2")]
2301#[cfg_attr(test, assert_instr(ucomisd))]
2302#[stable(feature = "simd_x86", since = "1.27.0")]
2303pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2304    unsafe { ucomigtsd(a, b) }
2305}
2306
2307#[inline]
2311#[target_feature(enable = "sse2")]
2312#[cfg_attr(test, assert_instr(ucomisd))]
2313#[stable(feature = "simd_x86", since = "1.27.0")]
2314pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2315    unsafe { ucomigesd(a, b) }
2316}
2317
2318#[inline]
2322#[target_feature(enable = "sse2")]
2323#[cfg_attr(test, assert_instr(ucomisd))]
2324#[stable(feature = "simd_x86", since = "1.27.0")]
2325pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2326    unsafe { ucomineqsd(a, b) }
2327}
2328
2329#[inline]
2334#[target_feature(enable = "sse2")]
2335#[cfg_attr(test, assert_instr(cvtpd2ps))]
2336#[stable(feature = "simd_x86", since = "1.27.0")]
2337pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2338    unsafe {
2339        let r = simd_cast::<_, f32x2>(a.as_f64x2());
2340        let zero = f32x2::ZERO;
2341        transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2342    }
2343}
2344
2345#[inline]
2351#[target_feature(enable = "sse2")]
2352#[cfg_attr(test, assert_instr(cvtps2pd))]
2353#[stable(feature = "simd_x86", since = "1.27.0")]
2354pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2355    unsafe {
2356        let a = a.as_f32x4();
2357        transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2358    }
2359}
2360
2361#[inline]
2366#[target_feature(enable = "sse2")]
2367#[cfg_attr(test, assert_instr(cvtpd2dq))]
2368#[stable(feature = "simd_x86", since = "1.27.0")]
2369pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2370    unsafe { transmute(cvtpd2dq(a)) }
2371}
2372
2373#[inline]
2378#[target_feature(enable = "sse2")]
2379#[cfg_attr(test, assert_instr(cvtsd2si))]
2380#[stable(feature = "simd_x86", since = "1.27.0")]
2381pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2382    unsafe { cvtsd2si(a) }
2383}
2384
2385#[inline]
2392#[target_feature(enable = "sse2")]
2393#[cfg_attr(test, assert_instr(cvtsd2ss))]
2394#[stable(feature = "simd_x86", since = "1.27.0")]
2395pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2396    unsafe { cvtsd2ss(a, b) }
2397}
2398
2399#[inline]
2403#[target_feature(enable = "sse2")]
2404#[stable(feature = "simd_x86", since = "1.27.0")]
2405pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2406    unsafe { simd_extract!(a, 0) }
2407}
2408
2409#[inline]
2416#[target_feature(enable = "sse2")]
2417#[cfg_attr(test, assert_instr(cvtss2sd))]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2420    unsafe { cvtss2sd(a, b) }
2421}
2422
2423#[inline]
2428#[target_feature(enable = "sse2")]
2429#[cfg_attr(test, assert_instr(cvttpd2dq))]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2431pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2432    unsafe { transmute(cvttpd2dq(a)) }
2433}
2434
2435#[inline]
2440#[target_feature(enable = "sse2")]
2441#[cfg_attr(test, assert_instr(cvttsd2si))]
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2444    unsafe { cvttsd2si(a) }
2445}
2446
2447#[inline]
2452#[target_feature(enable = "sse2")]
2453#[cfg_attr(test, assert_instr(cvttps2dq))]
2454#[stable(feature = "simd_x86", since = "1.27.0")]
2455pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2456    unsafe { transmute(cvttps2dq(a)) }
2457}
2458
2459#[inline]
2464#[target_feature(enable = "sse2")]
2465#[stable(feature = "simd_x86", since = "1.27.0")]
2466pub fn _mm_set_sd(a: f64) -> __m128d {
2467    _mm_set_pd(0.0, a)
2468}
2469
2470#[inline]
2475#[target_feature(enable = "sse2")]
2476#[stable(feature = "simd_x86", since = "1.27.0")]
2477pub fn _mm_set1_pd(a: f64) -> __m128d {
2478    _mm_set_pd(a, a)
2479}
2480
2481#[inline]
2486#[target_feature(enable = "sse2")]
2487#[stable(feature = "simd_x86", since = "1.27.0")]
2488pub fn _mm_set_pd1(a: f64) -> __m128d {
2489    _mm_set_pd(a, a)
2490}
2491
2492#[inline]
2497#[target_feature(enable = "sse2")]
2498#[stable(feature = "simd_x86", since = "1.27.0")]
2499pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2500    __m128d([b, a])
2501}
2502
2503#[inline]
2508#[target_feature(enable = "sse2")]
2509#[stable(feature = "simd_x86", since = "1.27.0")]
2510pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2511    _mm_set_pd(b, a)
2512}
2513
2514#[inline]
2519#[target_feature(enable = "sse2")]
2520#[cfg_attr(test, assert_instr(xorp))]
2521#[stable(feature = "simd_x86", since = "1.27.0")]
2522pub fn _mm_setzero_pd() -> __m128d {
2523    const { unsafe { mem::zeroed() } }
2524}
2525
2526#[inline]
2533#[target_feature(enable = "sse2")]
2534#[cfg_attr(test, assert_instr(movmskpd))]
2535#[stable(feature = "simd_x86", since = "1.27.0")]
2536pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2537    unsafe {
2540        let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2541        simd_bitmask::<i64x2, u8>(mask).into()
2542    }
2543}
2544
2545#[inline]
2552#[target_feature(enable = "sse2")]
2553#[cfg_attr(
2554    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2555    assert_instr(movaps)
2556)]
2557#[stable(feature = "simd_x86", since = "1.27.0")]
2558#[allow(clippy::cast_ptr_alignment)]
2559pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2560    *(mem_addr as *const __m128d)
2561}
2562
2563#[inline]
2568#[target_feature(enable = "sse2")]
2569#[cfg_attr(test, assert_instr(movsd))]
2570#[stable(feature = "simd_x86", since = "1.27.0")]
2571pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2572    _mm_setr_pd(*mem_addr, 0.)
2573}
2574
2575#[inline]
2581#[target_feature(enable = "sse2")]
2582#[cfg_attr(test, assert_instr(movhps))]
2583#[stable(feature = "simd_x86", since = "1.27.0")]
2584pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2585    _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2586}
2587
2588#[inline]
2594#[target_feature(enable = "sse2")]
2595#[cfg_attr(test, assert_instr(movlps))]
2596#[stable(feature = "simd_x86", since = "1.27.0")]
2597pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2598    _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2599}
2600
2601#[inline]
2617#[target_feature(enable = "sse2")]
2618#[cfg_attr(test, assert_instr(movntpd))]
2619#[stable(feature = "simd_x86", since = "1.27.0")]
2620#[allow(clippy::cast_ptr_alignment)]
2621pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2622    crate::arch::asm!(
2623        vps!("movntpd", ",{a}"),
2624        p = in(reg) mem_addr,
2625        a = in(xmm_reg) a,
2626        options(nostack, preserves_flags),
2627    );
2628}
2629
2630#[inline]
2635#[target_feature(enable = "sse2")]
2636#[cfg_attr(test, assert_instr(movlps))]
2637#[stable(feature = "simd_x86", since = "1.27.0")]
2638pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2639    *mem_addr = simd_extract!(a, 0)
2640}
2641
2642#[inline]
2648#[target_feature(enable = "sse2")]
2649#[cfg_attr(
2650    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2651    assert_instr(movaps)
2652)]
2653#[stable(feature = "simd_x86", since = "1.27.0")]
2654#[allow(clippy::cast_ptr_alignment)]
2655pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2656    *(mem_addr as *mut __m128d) = a;
2657}
2658
2659#[inline]
2665#[target_feature(enable = "sse2")]
2666#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2668pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2669    mem_addr.cast::<__m128d>().write_unaligned(a);
2670}
2671
2672#[inline]
2678#[target_feature(enable = "sse2")]
2679#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2680pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2681    ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2682}
2683
2684#[inline]
2690#[target_feature(enable = "sse2")]
2691#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2692pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2693    ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2694}
2695
2696#[inline]
2702#[target_feature(enable = "sse2")]
2703#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2704pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2705    ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2706}
2707
2708#[inline]
2714#[target_feature(enable = "sse2")]
2715#[stable(feature = "simd_x86", since = "1.27.0")]
2716#[allow(clippy::cast_ptr_alignment)]
2717pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2718    let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2719    *(mem_addr as *mut __m128d) = b;
2720}
2721
2722#[inline]
2728#[target_feature(enable = "sse2")]
2729#[stable(feature = "simd_x86", since = "1.27.0")]
2730#[allow(clippy::cast_ptr_alignment)]
2731pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2732    let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2733    *(mem_addr as *mut __m128d) = b;
2734}
2735
2736#[inline]
2743#[target_feature(enable = "sse2")]
2744#[stable(feature = "simd_x86", since = "1.27.0")]
2745#[allow(clippy::cast_ptr_alignment)]
2746pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2747    let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2748    *(mem_addr as *mut __m128d) = b;
2749}
2750
2751#[inline]
2756#[target_feature(enable = "sse2")]
2757#[cfg_attr(test, assert_instr(movhps))]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2759pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2760    *mem_addr = simd_extract!(a, 1);
2761}
2762
2763#[inline]
2768#[target_feature(enable = "sse2")]
2769#[cfg_attr(test, assert_instr(movlps))]
2770#[stable(feature = "simd_x86", since = "1.27.0")]
2771pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2772    *mem_addr = simd_extract!(a, 0);
2773}
2774
2775#[inline]
2780#[target_feature(enable = "sse2")]
2781#[stable(feature = "simd_x86", since = "1.27.0")]
2783pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2784    let d = *mem_addr;
2785    _mm_setr_pd(d, d)
2786}
2787
2788#[inline]
2793#[target_feature(enable = "sse2")]
2794#[stable(feature = "simd_x86", since = "1.27.0")]
2796pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2797    _mm_load1_pd(mem_addr)
2798}
2799
2800#[inline]
2806#[target_feature(enable = "sse2")]
2807#[cfg_attr(
2808    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2809    assert_instr(movaps)
2810)]
2811#[stable(feature = "simd_x86", since = "1.27.0")]
2812pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2813    let a = _mm_load_pd(mem_addr);
2814    simd_shuffle!(a, a, [1, 0])
2815}
2816
2817#[inline]
2823#[target_feature(enable = "sse2")]
2824#[cfg_attr(test, assert_instr(movups))]
2825#[stable(feature = "simd_x86", since = "1.27.0")]
2826pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2827    let mut dst = _mm_undefined_pd();
2828    ptr::copy_nonoverlapping(
2829        mem_addr as *const u8,
2830        ptr::addr_of_mut!(dst) as *mut u8,
2831        mem::size_of::<__m128d>(),
2832    );
2833    dst
2834}
2835
2836#[inline]
2842#[target_feature(enable = "sse2")]
2843#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2844pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2845    transmute(i16x8::new(
2846        ptr::read_unaligned(mem_addr as *const i16),
2847        0,
2848        0,
2849        0,
2850        0,
2851        0,
2852        0,
2853        0,
2854    ))
2855}
2856
2857#[inline]
2863#[target_feature(enable = "sse2")]
2864#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2865pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2866    transmute(i32x4::new(
2867        ptr::read_unaligned(mem_addr as *const i32),
2868        0,
2869        0,
2870        0,
2871    ))
2872}
2873
2874#[inline]
2880#[target_feature(enable = "sse2")]
2881#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2882pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2883    transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2884}
2885
2886#[inline]
2892#[target_feature(enable = "sse2")]
2893#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2894#[rustc_legacy_const_generics(2)]
2895#[stable(feature = "simd_x86", since = "1.27.0")]
2896pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2897    static_assert_uimm_bits!(MASK, 8);
2898    unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2899}
2900
2901#[inline]
2907#[target_feature(enable = "sse2")]
2908#[cfg_attr(test, assert_instr(movsd))]
2909#[stable(feature = "simd_x86", since = "1.27.0")]
2910pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2911    unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2912}
2913
2914#[inline]
2919#[target_feature(enable = "sse2")]
2920#[stable(feature = "simd_x86", since = "1.27.0")]
2921pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2922    unsafe { transmute(a) }
2923}
2924
2925#[inline]
2930#[target_feature(enable = "sse2")]
2931#[stable(feature = "simd_x86", since = "1.27.0")]
2932pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2933    unsafe { transmute(a) }
2934}
2935
2936#[inline]
2941#[target_feature(enable = "sse2")]
2942#[stable(feature = "simd_x86", since = "1.27.0")]
2943pub fn _mm_castps_pd(a: __m128) -> __m128d {
2944    unsafe { transmute(a) }
2945}
2946
2947#[inline]
2952#[target_feature(enable = "sse2")]
2953#[stable(feature = "simd_x86", since = "1.27.0")]
2954pub fn _mm_castps_si128(a: __m128) -> __m128i {
2955    unsafe { transmute(a) }
2956}
2957
2958#[inline]
2963#[target_feature(enable = "sse2")]
2964#[stable(feature = "simd_x86", since = "1.27.0")]
2965pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2966    unsafe { transmute(a) }
2967}
2968
2969#[inline]
2974#[target_feature(enable = "sse2")]
2975#[stable(feature = "simd_x86", since = "1.27.0")]
2976pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2977    unsafe { transmute(a) }
2978}
2979
2980#[inline]
2987#[target_feature(enable = "sse2")]
2988#[stable(feature = "simd_x86", since = "1.27.0")]
2989pub fn _mm_undefined_pd() -> __m128d {
2990    const { unsafe { mem::zeroed() } }
2991}
2992
2993#[inline]
3000#[target_feature(enable = "sse2")]
3001#[stable(feature = "simd_x86", since = "1.27.0")]
3002pub fn _mm_undefined_si128() -> __m128i {
3003    const { unsafe { mem::zeroed() } }
3004}
3005
3006#[inline]
3014#[target_feature(enable = "sse2")]
3015#[cfg_attr(test, assert_instr(unpckhpd))]
3016#[stable(feature = "simd_x86", since = "1.27.0")]
3017pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3018    unsafe { simd_shuffle!(a, b, [1, 3]) }
3019}
3020
3021#[inline]
3029#[target_feature(enable = "sse2")]
3030#[cfg_attr(test, assert_instr(movlhps))]
3031#[stable(feature = "simd_x86", since = "1.27.0")]
3032pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3033    unsafe { simd_shuffle!(a, b, [0, 2]) }
3034}
3035
3036#[allow(improper_ctypes)]
3037unsafe extern "C" {
3038    #[link_name = "llvm.x86.sse2.pause"]
3039    fn pause();
3040    #[link_name = "llvm.x86.sse2.clflush"]
3041    fn clflush(p: *const u8);
3042    #[link_name = "llvm.x86.sse2.lfence"]
3043    fn lfence();
3044    #[link_name = "llvm.x86.sse2.mfence"]
3045    fn mfence();
3046    #[link_name = "llvm.x86.sse2.pmadd.wd"]
3047    fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3048    #[link_name = "llvm.x86.sse2.psad.bw"]
3049    fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3050    #[link_name = "llvm.x86.sse2.psll.w"]
3051    fn psllw(a: i16x8, count: i16x8) -> i16x8;
3052    #[link_name = "llvm.x86.sse2.psll.d"]
3053    fn pslld(a: i32x4, count: i32x4) -> i32x4;
3054    #[link_name = "llvm.x86.sse2.psll.q"]
3055    fn psllq(a: i64x2, count: i64x2) -> i64x2;
3056    #[link_name = "llvm.x86.sse2.psra.w"]
3057    fn psraw(a: i16x8, count: i16x8) -> i16x8;
3058    #[link_name = "llvm.x86.sse2.psra.d"]
3059    fn psrad(a: i32x4, count: i32x4) -> i32x4;
3060    #[link_name = "llvm.x86.sse2.psrl.w"]
3061    fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3062    #[link_name = "llvm.x86.sse2.psrl.d"]
3063    fn psrld(a: i32x4, count: i32x4) -> i32x4;
3064    #[link_name = "llvm.x86.sse2.psrl.q"]
3065    fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3066    #[link_name = "llvm.x86.sse2.cvtps2dq"]
3067    fn cvtps2dq(a: __m128) -> i32x4;
3068    #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3069    fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3070    #[link_name = "llvm.x86.sse2.packsswb.128"]
3071    fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3072    #[link_name = "llvm.x86.sse2.packssdw.128"]
3073    fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3074    #[link_name = "llvm.x86.sse2.packuswb.128"]
3075    fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3076    #[link_name = "llvm.x86.sse2.max.sd"]
3077    fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3078    #[link_name = "llvm.x86.sse2.max.pd"]
3079    fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3080    #[link_name = "llvm.x86.sse2.min.sd"]
3081    fn minsd(a: __m128d, b: __m128d) -> __m128d;
3082    #[link_name = "llvm.x86.sse2.min.pd"]
3083    fn minpd(a: __m128d, b: __m128d) -> __m128d;
3084    #[link_name = "llvm.x86.sse2.cmp.sd"]
3085    fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3086    #[link_name = "llvm.x86.sse2.cmp.pd"]
3087    fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3088    #[link_name = "llvm.x86.sse2.comieq.sd"]
3089    fn comieqsd(a: __m128d, b: __m128d) -> i32;
3090    #[link_name = "llvm.x86.sse2.comilt.sd"]
3091    fn comiltsd(a: __m128d, b: __m128d) -> i32;
3092    #[link_name = "llvm.x86.sse2.comile.sd"]
3093    fn comilesd(a: __m128d, b: __m128d) -> i32;
3094    #[link_name = "llvm.x86.sse2.comigt.sd"]
3095    fn comigtsd(a: __m128d, b: __m128d) -> i32;
3096    #[link_name = "llvm.x86.sse2.comige.sd"]
3097    fn comigesd(a: __m128d, b: __m128d) -> i32;
3098    #[link_name = "llvm.x86.sse2.comineq.sd"]
3099    fn comineqsd(a: __m128d, b: __m128d) -> i32;
3100    #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3101    fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3102    #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3103    fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3104    #[link_name = "llvm.x86.sse2.ucomile.sd"]
3105    fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3106    #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3107    fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3108    #[link_name = "llvm.x86.sse2.ucomige.sd"]
3109    fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3110    #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3111    fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3112    #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3113    fn cvtpd2dq(a: __m128d) -> i32x4;
3114    #[link_name = "llvm.x86.sse2.cvtsd2si"]
3115    fn cvtsd2si(a: __m128d) -> i32;
3116    #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3117    fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3118    #[link_name = "llvm.x86.sse2.cvtss2sd"]
3119    fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3120    #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3121    fn cvttpd2dq(a: __m128d) -> i32x4;
3122    #[link_name = "llvm.x86.sse2.cvttsd2si"]
3123    fn cvttsd2si(a: __m128d) -> i32;
3124    #[link_name = "llvm.x86.sse2.cvttps2dq"]
3125    fn cvttps2dq(a: __m128) -> i32x4;
3126}
3127
3128#[cfg(test)]
3129mod tests {
3130    use crate::{
3131        core_arch::{simd::*, x86::*},
3132        hint::black_box,
3133    };
3134    use std::{
3135        boxed, f32, f64,
3136        mem::{self, transmute},
3137        ptr,
3138    };
3139    use stdarch_test::simd_test;
3140
3141    const NAN: f64 = f64::NAN;
3142
3143    #[test]
3144    fn test_mm_pause() {
3145        unsafe { _mm_pause() }
3146    }
3147
3148    #[simd_test(enable = "sse2")]
3149    unsafe fn test_mm_clflush() {
3150        let x = 0_u8;
3151        _mm_clflush(ptr::addr_of!(x));
3152    }
3153
3154    #[simd_test(enable = "sse2")]
3155    #[cfg_attr(miri, ignore)]
3157    unsafe fn test_mm_lfence() {
3158        _mm_lfence();
3159    }
3160
3161    #[simd_test(enable = "sse2")]
3162    #[cfg_attr(miri, ignore)]
3164    unsafe fn test_mm_mfence() {
3165        _mm_mfence();
3166    }
3167
3168    #[simd_test(enable = "sse2")]
3169    unsafe fn test_mm_add_epi8() {
3170        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3171        #[rustfmt::skip]
3172        let b = _mm_setr_epi8(
3173            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3174        );
3175        let r = _mm_add_epi8(a, b);
3176        #[rustfmt::skip]
3177        let e = _mm_setr_epi8(
3178            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3179        );
3180        assert_eq_m128i(r, e);
3181    }
3182
3183    #[simd_test(enable = "sse2")]
3184    unsafe fn test_mm_add_epi8_overflow() {
3185        let a = _mm_set1_epi8(0x7F);
3186        let b = _mm_set1_epi8(1);
3187        let r = _mm_add_epi8(a, b);
3188        assert_eq_m128i(r, _mm_set1_epi8(-128));
3189    }
3190
3191    #[simd_test(enable = "sse2")]
3192    unsafe fn test_mm_add_epi16() {
3193        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3194        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3195        let r = _mm_add_epi16(a, b);
3196        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3197        assert_eq_m128i(r, e);
3198    }
3199
3200    #[simd_test(enable = "sse2")]
3201    unsafe fn test_mm_add_epi32() {
3202        let a = _mm_setr_epi32(0, 1, 2, 3);
3203        let b = _mm_setr_epi32(4, 5, 6, 7);
3204        let r = _mm_add_epi32(a, b);
3205        let e = _mm_setr_epi32(4, 6, 8, 10);
3206        assert_eq_m128i(r, e);
3207    }
3208
3209    #[simd_test(enable = "sse2")]
3210    unsafe fn test_mm_add_epi64() {
3211        let a = _mm_setr_epi64x(0, 1);
3212        let b = _mm_setr_epi64x(2, 3);
3213        let r = _mm_add_epi64(a, b);
3214        let e = _mm_setr_epi64x(2, 4);
3215        assert_eq_m128i(r, e);
3216    }
3217
3218    #[simd_test(enable = "sse2")]
3219    unsafe fn test_mm_adds_epi8() {
3220        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3221        #[rustfmt::skip]
3222        let b = _mm_setr_epi8(
3223            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3224        );
3225        let r = _mm_adds_epi8(a, b);
3226        #[rustfmt::skip]
3227        let e = _mm_setr_epi8(
3228            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3229        );
3230        assert_eq_m128i(r, e);
3231    }
3232
3233    #[simd_test(enable = "sse2")]
3234    unsafe fn test_mm_adds_epi8_saturate_positive() {
3235        let a = _mm_set1_epi8(0x7F);
3236        let b = _mm_set1_epi8(1);
3237        let r = _mm_adds_epi8(a, b);
3238        assert_eq_m128i(r, a);
3239    }
3240
3241    #[simd_test(enable = "sse2")]
3242    unsafe fn test_mm_adds_epi8_saturate_negative() {
3243        let a = _mm_set1_epi8(-0x80);
3244        let b = _mm_set1_epi8(-1);
3245        let r = _mm_adds_epi8(a, b);
3246        assert_eq_m128i(r, a);
3247    }
3248
3249    #[simd_test(enable = "sse2")]
3250    unsafe fn test_mm_adds_epi16() {
3251        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3252        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3253        let r = _mm_adds_epi16(a, b);
3254        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3255        assert_eq_m128i(r, e);
3256    }
3257
3258    #[simd_test(enable = "sse2")]
3259    unsafe fn test_mm_adds_epi16_saturate_positive() {
3260        let a = _mm_set1_epi16(0x7FFF);
3261        let b = _mm_set1_epi16(1);
3262        let r = _mm_adds_epi16(a, b);
3263        assert_eq_m128i(r, a);
3264    }
3265
3266    #[simd_test(enable = "sse2")]
3267    unsafe fn test_mm_adds_epi16_saturate_negative() {
3268        let a = _mm_set1_epi16(-0x8000);
3269        let b = _mm_set1_epi16(-1);
3270        let r = _mm_adds_epi16(a, b);
3271        assert_eq_m128i(r, a);
3272    }
3273
3274    #[simd_test(enable = "sse2")]
3275    unsafe fn test_mm_adds_epu8() {
3276        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3277        #[rustfmt::skip]
3278        let b = _mm_setr_epi8(
3279            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3280        );
3281        let r = _mm_adds_epu8(a, b);
3282        #[rustfmt::skip]
3283        let e = _mm_setr_epi8(
3284            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3285        );
3286        assert_eq_m128i(r, e);
3287    }
3288
3289    #[simd_test(enable = "sse2")]
3290    unsafe fn test_mm_adds_epu8_saturate() {
3291        let a = _mm_set1_epi8(!0);
3292        let b = _mm_set1_epi8(1);
3293        let r = _mm_adds_epu8(a, b);
3294        assert_eq_m128i(r, a);
3295    }
3296
3297    #[simd_test(enable = "sse2")]
3298    unsafe fn test_mm_adds_epu16() {
3299        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3300        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3301        let r = _mm_adds_epu16(a, b);
3302        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3303        assert_eq_m128i(r, e);
3304    }
3305
3306    #[simd_test(enable = "sse2")]
3307    unsafe fn test_mm_adds_epu16_saturate() {
3308        let a = _mm_set1_epi16(!0);
3309        let b = _mm_set1_epi16(1);
3310        let r = _mm_adds_epu16(a, b);
3311        assert_eq_m128i(r, a);
3312    }
3313
3314    #[simd_test(enable = "sse2")]
3315    unsafe fn test_mm_avg_epu8() {
3316        let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3317        let r = _mm_avg_epu8(a, b);
3318        assert_eq_m128i(r, _mm_set1_epi8(6));
3319    }
3320
3321    #[simd_test(enable = "sse2")]
3322    unsafe fn test_mm_avg_epu16() {
3323        let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3324        let r = _mm_avg_epu16(a, b);
3325        assert_eq_m128i(r, _mm_set1_epi16(6));
3326    }
3327
3328    #[simd_test(enable = "sse2")]
3329    unsafe fn test_mm_madd_epi16() {
3330        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3331        let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3332        let r = _mm_madd_epi16(a, b);
3333        let e = _mm_setr_epi32(29, 81, 149, 233);
3334        assert_eq_m128i(r, e);
3335
3336        let a = _mm_setr_epi16(
3339            i16::MAX,
3340            i16::MAX,
3341            i16::MIN,
3342            i16::MIN,
3343            i16::MIN,
3344            i16::MAX,
3345            0,
3346            0,
3347        );
3348        let b = _mm_setr_epi16(
3349            i16::MAX,
3350            i16::MAX,
3351            i16::MIN,
3352            i16::MIN,
3353            i16::MAX,
3354            i16::MIN,
3355            0,
3356            0,
3357        );
3358        let r = _mm_madd_epi16(a, b);
3359        let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3360        assert_eq_m128i(r, e);
3361    }
3362
3363    #[simd_test(enable = "sse2")]
3364    unsafe fn test_mm_max_epi16() {
3365        let a = _mm_set1_epi16(1);
3366        let b = _mm_set1_epi16(-1);
3367        let r = _mm_max_epi16(a, b);
3368        assert_eq_m128i(r, a);
3369    }
3370
3371    #[simd_test(enable = "sse2")]
3372    unsafe fn test_mm_max_epu8() {
3373        let a = _mm_set1_epi8(1);
3374        let b = _mm_set1_epi8(!0);
3375        let r = _mm_max_epu8(a, b);
3376        assert_eq_m128i(r, b);
3377    }
3378
3379    #[simd_test(enable = "sse2")]
3380    unsafe fn test_mm_min_epi16() {
3381        let a = _mm_set1_epi16(1);
3382        let b = _mm_set1_epi16(-1);
3383        let r = _mm_min_epi16(a, b);
3384        assert_eq_m128i(r, b);
3385    }
3386
3387    #[simd_test(enable = "sse2")]
3388    unsafe fn test_mm_min_epu8() {
3389        let a = _mm_set1_epi8(1);
3390        let b = _mm_set1_epi8(!0);
3391        let r = _mm_min_epu8(a, b);
3392        assert_eq_m128i(r, a);
3393    }
3394
3395    #[simd_test(enable = "sse2")]
3396    unsafe fn test_mm_mulhi_epi16() {
3397        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3398        let r = _mm_mulhi_epi16(a, b);
3399        assert_eq_m128i(r, _mm_set1_epi16(-16));
3400    }
3401
3402    #[simd_test(enable = "sse2")]
3403    unsafe fn test_mm_mulhi_epu16() {
3404        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3405        let r = _mm_mulhi_epu16(a, b);
3406        assert_eq_m128i(r, _mm_set1_epi16(15));
3407    }
3408
3409    #[simd_test(enable = "sse2")]
3410    unsafe fn test_mm_mullo_epi16() {
3411        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3412        let r = _mm_mullo_epi16(a, b);
3413        assert_eq_m128i(r, _mm_set1_epi16(-17960));
3414    }
3415
3416    #[simd_test(enable = "sse2")]
3417    unsafe fn test_mm_mul_epu32() {
3418        let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3419        let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3420        let r = _mm_mul_epu32(a, b);
3421        let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3422        assert_eq_m128i(r, e);
3423    }
3424
3425    #[simd_test(enable = "sse2")]
3426    unsafe fn test_mm_sad_epu8() {
3427        #[rustfmt::skip]
3428        let a = _mm_setr_epi8(
3429            255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3430            1, 2, 3, 4,
3431            155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3432            1, 2, 3, 4,
3433        );
3434        let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3435        let r = _mm_sad_epu8(a, b);
3436        let e = _mm_setr_epi64x(1020, 614);
3437        assert_eq_m128i(r, e);
3438    }
3439
3440    #[simd_test(enable = "sse2")]
3441    unsafe fn test_mm_sub_epi8() {
3442        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3443        let r = _mm_sub_epi8(a, b);
3444        assert_eq_m128i(r, _mm_set1_epi8(-1));
3445    }
3446
3447    #[simd_test(enable = "sse2")]
3448    unsafe fn test_mm_sub_epi16() {
3449        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3450        let r = _mm_sub_epi16(a, b);
3451        assert_eq_m128i(r, _mm_set1_epi16(-1));
3452    }
3453
3454    #[simd_test(enable = "sse2")]
3455    unsafe fn test_mm_sub_epi32() {
3456        let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3457        let r = _mm_sub_epi32(a, b);
3458        assert_eq_m128i(r, _mm_set1_epi32(-1));
3459    }
3460
3461    #[simd_test(enable = "sse2")]
3462    unsafe fn test_mm_sub_epi64() {
3463        let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3464        let r = _mm_sub_epi64(a, b);
3465        assert_eq_m128i(r, _mm_set1_epi64x(-1));
3466    }
3467
3468    #[simd_test(enable = "sse2")]
3469    unsafe fn test_mm_subs_epi8() {
3470        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3471        let r = _mm_subs_epi8(a, b);
3472        assert_eq_m128i(r, _mm_set1_epi8(3));
3473    }
3474
3475    #[simd_test(enable = "sse2")]
3476    unsafe fn test_mm_subs_epi8_saturate_positive() {
3477        let a = _mm_set1_epi8(0x7F);
3478        let b = _mm_set1_epi8(-1);
3479        let r = _mm_subs_epi8(a, b);
3480        assert_eq_m128i(r, a);
3481    }
3482
3483    #[simd_test(enable = "sse2")]
3484    unsafe fn test_mm_subs_epi8_saturate_negative() {
3485        let a = _mm_set1_epi8(-0x80);
3486        let b = _mm_set1_epi8(1);
3487        let r = _mm_subs_epi8(a, b);
3488        assert_eq_m128i(r, a);
3489    }
3490
3491    #[simd_test(enable = "sse2")]
3492    unsafe fn test_mm_subs_epi16() {
3493        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3494        let r = _mm_subs_epi16(a, b);
3495        assert_eq_m128i(r, _mm_set1_epi16(3));
3496    }
3497
3498    #[simd_test(enable = "sse2")]
3499    unsafe fn test_mm_subs_epi16_saturate_positive() {
3500        let a = _mm_set1_epi16(0x7FFF);
3501        let b = _mm_set1_epi16(-1);
3502        let r = _mm_subs_epi16(a, b);
3503        assert_eq_m128i(r, a);
3504    }
3505
3506    #[simd_test(enable = "sse2")]
3507    unsafe fn test_mm_subs_epi16_saturate_negative() {
3508        let a = _mm_set1_epi16(-0x8000);
3509        let b = _mm_set1_epi16(1);
3510        let r = _mm_subs_epi16(a, b);
3511        assert_eq_m128i(r, a);
3512    }
3513
3514    #[simd_test(enable = "sse2")]
3515    unsafe fn test_mm_subs_epu8() {
3516        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3517        let r = _mm_subs_epu8(a, b);
3518        assert_eq_m128i(r, _mm_set1_epi8(3));
3519    }
3520
3521    #[simd_test(enable = "sse2")]
3522    unsafe fn test_mm_subs_epu8_saturate() {
3523        let a = _mm_set1_epi8(0);
3524        let b = _mm_set1_epi8(1);
3525        let r = _mm_subs_epu8(a, b);
3526        assert_eq_m128i(r, a);
3527    }
3528
3529    #[simd_test(enable = "sse2")]
3530    unsafe fn test_mm_subs_epu16() {
3531        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3532        let r = _mm_subs_epu16(a, b);
3533        assert_eq_m128i(r, _mm_set1_epi16(3));
3534    }
3535
3536    #[simd_test(enable = "sse2")]
3537    unsafe fn test_mm_subs_epu16_saturate() {
3538        let a = _mm_set1_epi16(0);
3539        let b = _mm_set1_epi16(1);
3540        let r = _mm_subs_epu16(a, b);
3541        assert_eq_m128i(r, a);
3542    }
3543
3544    #[simd_test(enable = "sse2")]
3545    unsafe fn test_mm_slli_si128() {
3546        #[rustfmt::skip]
3547        let a = _mm_setr_epi8(
3548            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3549        );
3550        let r = _mm_slli_si128::<1>(a);
3551        let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3552        assert_eq_m128i(r, e);
3553
3554        #[rustfmt::skip]
3555        let a = _mm_setr_epi8(
3556            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3557        );
3558        let r = _mm_slli_si128::<15>(a);
3559        let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3560        assert_eq_m128i(r, e);
3561
3562        #[rustfmt::skip]
3563        let a = _mm_setr_epi8(
3564            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3565        );
3566        let r = _mm_slli_si128::<16>(a);
3567        assert_eq_m128i(r, _mm_set1_epi8(0));
3568    }
3569
3570    #[simd_test(enable = "sse2")]
3571    unsafe fn test_mm_slli_epi16() {
3572        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3573        let r = _mm_slli_epi16::<4>(a);
3574        assert_eq_m128i(
3575            r,
3576            _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3577        );
3578        let r = _mm_slli_epi16::<16>(a);
3579        assert_eq_m128i(r, _mm_set1_epi16(0));
3580    }
3581
3582    #[simd_test(enable = "sse2")]
3583    unsafe fn test_mm_sll_epi16() {
3584        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3585        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3586        assert_eq_m128i(
3587            r,
3588            _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3589        );
3590        let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3591        assert_eq_m128i(r, a);
3592        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3593        assert_eq_m128i(r, _mm_set1_epi16(0));
3594        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3595        assert_eq_m128i(r, _mm_set1_epi16(0));
3596    }
3597
3598    #[simd_test(enable = "sse2")]
3599    unsafe fn test_mm_slli_epi32() {
3600        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3601        let r = _mm_slli_epi32::<4>(a);
3602        assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3603        let r = _mm_slli_epi32::<32>(a);
3604        assert_eq_m128i(r, _mm_set1_epi32(0));
3605    }
3606
3607    #[simd_test(enable = "sse2")]
3608    unsafe fn test_mm_sll_epi32() {
3609        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3610        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3611        assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3612        let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3613        assert_eq_m128i(r, a);
3614        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3615        assert_eq_m128i(r, _mm_set1_epi32(0));
3616        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3617        assert_eq_m128i(r, _mm_set1_epi32(0));
3618    }
3619
3620    #[simd_test(enable = "sse2")]
3621    unsafe fn test_mm_slli_epi64() {
3622        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3623        let r = _mm_slli_epi64::<4>(a);
3624        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3625        let r = _mm_slli_epi64::<64>(a);
3626        assert_eq_m128i(r, _mm_set1_epi64x(0));
3627    }
3628
3629    #[simd_test(enable = "sse2")]
3630    unsafe fn test_mm_sll_epi64() {
3631        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3632        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3633        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3634        let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3635        assert_eq_m128i(r, a);
3636        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3637        assert_eq_m128i(r, _mm_set1_epi64x(0));
3638        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3639        assert_eq_m128i(r, _mm_set1_epi64x(0));
3640    }
3641
3642    #[simd_test(enable = "sse2")]
3643    unsafe fn test_mm_srai_epi16() {
3644        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3645        let r = _mm_srai_epi16::<4>(a);
3646        assert_eq_m128i(
3647            r,
3648            _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3649        );
3650        let r = _mm_srai_epi16::<16>(a);
3651        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3652    }
3653
3654    #[simd_test(enable = "sse2")]
3655    unsafe fn test_mm_sra_epi16() {
3656        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3657        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3658        assert_eq_m128i(
3659            r,
3660            _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3661        );
3662        let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3663        assert_eq_m128i(r, a);
3664        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3665        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3666        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3667        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3668    }
3669
3670    #[simd_test(enable = "sse2")]
3671    unsafe fn test_mm_srai_epi32() {
3672        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3673        let r = _mm_srai_epi32::<4>(a);
3674        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3675        let r = _mm_srai_epi32::<32>(a);
3676        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3677    }
3678
3679    #[simd_test(enable = "sse2")]
3680    unsafe fn test_mm_sra_epi32() {
3681        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3682        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3683        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3684        let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3685        assert_eq_m128i(r, a);
3686        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3687        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3688        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3689        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3690    }
3691
3692    #[simd_test(enable = "sse2")]
3693    unsafe fn test_mm_srli_si128() {
3694        #[rustfmt::skip]
3695        let a = _mm_setr_epi8(
3696            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3697        );
3698        let r = _mm_srli_si128::<1>(a);
3699        #[rustfmt::skip]
3700        let e = _mm_setr_epi8(
3701            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3702        );
3703        assert_eq_m128i(r, e);
3704
3705        #[rustfmt::skip]
3706        let a = _mm_setr_epi8(
3707            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3708        );
3709        let r = _mm_srli_si128::<15>(a);
3710        let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3711        assert_eq_m128i(r, e);
3712
3713        #[rustfmt::skip]
3714        let a = _mm_setr_epi8(
3715            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3716        );
3717        let r = _mm_srli_si128::<16>(a);
3718        assert_eq_m128i(r, _mm_set1_epi8(0));
3719    }
3720
3721    #[simd_test(enable = "sse2")]
3722    unsafe fn test_mm_srli_epi16() {
3723        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3724        let r = _mm_srli_epi16::<4>(a);
3725        assert_eq_m128i(
3726            r,
3727            _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3728        );
3729        let r = _mm_srli_epi16::<16>(a);
3730        assert_eq_m128i(r, _mm_set1_epi16(0));
3731    }
3732
3733    #[simd_test(enable = "sse2")]
3734    unsafe fn test_mm_srl_epi16() {
3735        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3736        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3737        assert_eq_m128i(
3738            r,
3739            _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3740        );
3741        let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3742        assert_eq_m128i(r, a);
3743        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3744        assert_eq_m128i(r, _mm_set1_epi16(0));
3745        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3746        assert_eq_m128i(r, _mm_set1_epi16(0));
3747    }
3748
3749    #[simd_test(enable = "sse2")]
3750    unsafe fn test_mm_srli_epi32() {
3751        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3752        let r = _mm_srli_epi32::<4>(a);
3753        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3754        let r = _mm_srli_epi32::<32>(a);
3755        assert_eq_m128i(r, _mm_set1_epi32(0));
3756    }
3757
3758    #[simd_test(enable = "sse2")]
3759    unsafe fn test_mm_srl_epi32() {
3760        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3761        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3762        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3763        let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3764        assert_eq_m128i(r, a);
3765        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3766        assert_eq_m128i(r, _mm_set1_epi32(0));
3767        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3768        assert_eq_m128i(r, _mm_set1_epi32(0));
3769    }
3770
3771    #[simd_test(enable = "sse2")]
3772    unsafe fn test_mm_srli_epi64() {
3773        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3774        let r = _mm_srli_epi64::<4>(a);
3775        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3776        let r = _mm_srli_epi64::<64>(a);
3777        assert_eq_m128i(r, _mm_set1_epi64x(0));
3778    }
3779
3780    #[simd_test(enable = "sse2")]
3781    unsafe fn test_mm_srl_epi64() {
3782        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3783        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3784        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3785        let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3786        assert_eq_m128i(r, a);
3787        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3788        assert_eq_m128i(r, _mm_set1_epi64x(0));
3789        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3790        assert_eq_m128i(r, _mm_set1_epi64x(0));
3791    }
3792
3793    #[simd_test(enable = "sse2")]
3794    unsafe fn test_mm_and_si128() {
3795        let a = _mm_set1_epi8(5);
3796        let b = _mm_set1_epi8(3);
3797        let r = _mm_and_si128(a, b);
3798        assert_eq_m128i(r, _mm_set1_epi8(1));
3799    }
3800
3801    #[simd_test(enable = "sse2")]
3802    unsafe fn test_mm_andnot_si128() {
3803        let a = _mm_set1_epi8(5);
3804        let b = _mm_set1_epi8(3);
3805        let r = _mm_andnot_si128(a, b);
3806        assert_eq_m128i(r, _mm_set1_epi8(2));
3807    }
3808
3809    #[simd_test(enable = "sse2")]
3810    unsafe fn test_mm_or_si128() {
3811        let a = _mm_set1_epi8(5);
3812        let b = _mm_set1_epi8(3);
3813        let r = _mm_or_si128(a, b);
3814        assert_eq_m128i(r, _mm_set1_epi8(7));
3815    }
3816
3817    #[simd_test(enable = "sse2")]
3818    unsafe fn test_mm_xor_si128() {
3819        let a = _mm_set1_epi8(5);
3820        let b = _mm_set1_epi8(3);
3821        let r = _mm_xor_si128(a, b);
3822        assert_eq_m128i(r, _mm_set1_epi8(6));
3823    }
3824
3825    #[simd_test(enable = "sse2")]
3826    unsafe fn test_mm_cmpeq_epi8() {
3827        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3828        let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3829        let r = _mm_cmpeq_epi8(a, b);
3830        #[rustfmt::skip]
3831        assert_eq_m128i(
3832            r,
3833            _mm_setr_epi8(
3834                0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3835            )
3836        );
3837    }
3838
3839    #[simd_test(enable = "sse2")]
3840    unsafe fn test_mm_cmpeq_epi16() {
3841        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3842        let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3843        let r = _mm_cmpeq_epi16(a, b);
3844        assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3845    }
3846
3847    #[simd_test(enable = "sse2")]
3848    unsafe fn test_mm_cmpeq_epi32() {
3849        let a = _mm_setr_epi32(0, 1, 2, 3);
3850        let b = _mm_setr_epi32(3, 2, 2, 0);
3851        let r = _mm_cmpeq_epi32(a, b);
3852        assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3853    }
3854
3855    #[simd_test(enable = "sse2")]
3856    unsafe fn test_mm_cmpgt_epi8() {
3857        let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3858        let b = _mm_set1_epi8(0);
3859        let r = _mm_cmpgt_epi8(a, b);
3860        let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3861        assert_eq_m128i(r, e);
3862    }
3863
3864    #[simd_test(enable = "sse2")]
3865    unsafe fn test_mm_cmpgt_epi16() {
3866        let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3867        let b = _mm_set1_epi16(0);
3868        let r = _mm_cmpgt_epi16(a, b);
3869        let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3870        assert_eq_m128i(r, e);
3871    }
3872
3873    #[simd_test(enable = "sse2")]
3874    unsafe fn test_mm_cmpgt_epi32() {
3875        let a = _mm_set_epi32(5, 0, 0, 0);
3876        let b = _mm_set1_epi32(0);
3877        let r = _mm_cmpgt_epi32(a, b);
3878        assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3879    }
3880
3881    #[simd_test(enable = "sse2")]
3882    unsafe fn test_mm_cmplt_epi8() {
3883        let a = _mm_set1_epi8(0);
3884        let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3885        let r = _mm_cmplt_epi8(a, b);
3886        let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3887        assert_eq_m128i(r, e);
3888    }
3889
3890    #[simd_test(enable = "sse2")]
3891    unsafe fn test_mm_cmplt_epi16() {
3892        let a = _mm_set1_epi16(0);
3893        let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3894        let r = _mm_cmplt_epi16(a, b);
3895        let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3896        assert_eq_m128i(r, e);
3897    }
3898
3899    #[simd_test(enable = "sse2")]
3900    unsafe fn test_mm_cmplt_epi32() {
3901        let a = _mm_set1_epi32(0);
3902        let b = _mm_set_epi32(5, 0, 0, 0);
3903        let r = _mm_cmplt_epi32(a, b);
3904        assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3905    }
3906
3907    #[simd_test(enable = "sse2")]
3908    unsafe fn test_mm_cvtepi32_pd() {
3909        let a = _mm_set_epi32(35, 25, 15, 5);
3910        let r = _mm_cvtepi32_pd(a);
3911        assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3912    }
3913
3914    #[simd_test(enable = "sse2")]
3915    unsafe fn test_mm_cvtsi32_sd() {
3916        let a = _mm_set1_pd(3.5);
3917        let r = _mm_cvtsi32_sd(a, 5);
3918        assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3919    }
3920
3921    #[simd_test(enable = "sse2")]
3922    unsafe fn test_mm_cvtepi32_ps() {
3923        let a = _mm_setr_epi32(1, 2, 3, 4);
3924        let r = _mm_cvtepi32_ps(a);
3925        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3926    }
3927
3928    #[simd_test(enable = "sse2")]
3929    unsafe fn test_mm_cvtps_epi32() {
3930        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3931        let r = _mm_cvtps_epi32(a);
3932        assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3933    }
3934
3935    #[simd_test(enable = "sse2")]
3936    unsafe fn test_mm_cvtsi32_si128() {
3937        let r = _mm_cvtsi32_si128(5);
3938        assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3939    }
3940
3941    #[simd_test(enable = "sse2")]
3942    unsafe fn test_mm_cvtsi128_si32() {
3943        let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3944        assert_eq!(r, 5);
3945    }
3946
3947    #[simd_test(enable = "sse2")]
3948    unsafe fn test_mm_set_epi64x() {
3949        let r = _mm_set_epi64x(0, 1);
3950        assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3951    }
3952
3953    #[simd_test(enable = "sse2")]
3954    unsafe fn test_mm_set_epi32() {
3955        let r = _mm_set_epi32(0, 1, 2, 3);
3956        assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3957    }
3958
3959    #[simd_test(enable = "sse2")]
3960    unsafe fn test_mm_set_epi16() {
3961        let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3962        assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3963    }
3964
3965    #[simd_test(enable = "sse2")]
3966    unsafe fn test_mm_set_epi8() {
3967        #[rustfmt::skip]
3968        let r = _mm_set_epi8(
3969            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3970        );
3971        #[rustfmt::skip]
3972        let e = _mm_setr_epi8(
3973            15, 14, 13, 12, 11, 10, 9, 8,
3974            7, 6, 5, 4, 3, 2, 1, 0,
3975        );
3976        assert_eq_m128i(r, e);
3977    }
3978
3979    #[simd_test(enable = "sse2")]
3980    unsafe fn test_mm_set1_epi64x() {
3981        let r = _mm_set1_epi64x(1);
3982        assert_eq_m128i(r, _mm_set1_epi64x(1));
3983    }
3984
3985    #[simd_test(enable = "sse2")]
3986    unsafe fn test_mm_set1_epi32() {
3987        let r = _mm_set1_epi32(1);
3988        assert_eq_m128i(r, _mm_set1_epi32(1));
3989    }
3990
3991    #[simd_test(enable = "sse2")]
3992    unsafe fn test_mm_set1_epi16() {
3993        let r = _mm_set1_epi16(1);
3994        assert_eq_m128i(r, _mm_set1_epi16(1));
3995    }
3996
3997    #[simd_test(enable = "sse2")]
3998    unsafe fn test_mm_set1_epi8() {
3999        let r = _mm_set1_epi8(1);
4000        assert_eq_m128i(r, _mm_set1_epi8(1));
4001    }
4002
4003    #[simd_test(enable = "sse2")]
4004    unsafe fn test_mm_setr_epi32() {
4005        let r = _mm_setr_epi32(0, 1, 2, 3);
4006        assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4007    }
4008
4009    #[simd_test(enable = "sse2")]
4010    unsafe fn test_mm_setr_epi16() {
4011        let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4012        assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4013    }
4014
4015    #[simd_test(enable = "sse2")]
4016    unsafe fn test_mm_setr_epi8() {
4017        #[rustfmt::skip]
4018        let r = _mm_setr_epi8(
4019            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4020        );
4021        #[rustfmt::skip]
4022        let e = _mm_setr_epi8(
4023            0, 1, 2, 3, 4, 5, 6, 7,
4024            8, 9, 10, 11, 12, 13, 14, 15,
4025        );
4026        assert_eq_m128i(r, e);
4027    }
4028
4029    #[simd_test(enable = "sse2")]
4030    unsafe fn test_mm_setzero_si128() {
4031        let r = _mm_setzero_si128();
4032        assert_eq_m128i(r, _mm_set1_epi64x(0));
4033    }
4034
4035    #[simd_test(enable = "sse2")]
4036    unsafe fn test_mm_loadl_epi64() {
4037        let a = _mm_setr_epi64x(6, 5);
4038        let r = _mm_loadl_epi64(ptr::addr_of!(a));
4039        assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4040    }
4041
4042    #[simd_test(enable = "sse2")]
4043    unsafe fn test_mm_load_si128() {
4044        let a = _mm_set_epi64x(5, 6);
4045        let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4046        assert_eq_m128i(a, r);
4047    }
4048
4049    #[simd_test(enable = "sse2")]
4050    unsafe fn test_mm_loadu_si128() {
4051        let a = _mm_set_epi64x(5, 6);
4052        let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4053        assert_eq_m128i(a, r);
4054    }
4055
4056    #[simd_test(enable = "sse2")]
4057    #[cfg_attr(miri, ignore)]
4060    unsafe fn test_mm_maskmoveu_si128() {
4061        let a = _mm_set1_epi8(9);
4062        #[rustfmt::skip]
4063        let mask = _mm_set_epi8(
4064            0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4065            0, 0, 0, 0, 0, 0, 0, 0,
4066        );
4067        let mut r = _mm_set1_epi8(0);
4068        _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4069        let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4070        assert_eq_m128i(r, e);
4071    }
4072
4073    #[simd_test(enable = "sse2")]
4074    unsafe fn test_mm_store_si128() {
4075        let a = _mm_set1_epi8(9);
4076        let mut r = _mm_set1_epi8(0);
4077        _mm_store_si128(&mut r, a);
4078        assert_eq_m128i(r, a);
4079    }
4080
4081    #[simd_test(enable = "sse2")]
4082    unsafe fn test_mm_storeu_si128() {
4083        let a = _mm_set1_epi8(9);
4084        let mut r = _mm_set1_epi8(0);
4085        _mm_storeu_si128(&mut r, a);
4086        assert_eq_m128i(r, a);
4087    }
4088
4089    #[simd_test(enable = "sse2")]
4090    unsafe fn test_mm_storel_epi64() {
4091        let a = _mm_setr_epi64x(2, 9);
4092        let mut r = _mm_set1_epi8(0);
4093        _mm_storel_epi64(&mut r, a);
4094        assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4095    }
4096
4097    #[simd_test(enable = "sse2")]
4098    #[cfg_attr(miri, ignore)]
4101    unsafe fn test_mm_stream_si128() {
4102        let a = _mm_setr_epi32(1, 2, 3, 4);
4103        let mut r = _mm_undefined_si128();
4104        _mm_stream_si128(ptr::addr_of_mut!(r), a);
4105        assert_eq_m128i(r, a);
4106    }
4107
4108    #[simd_test(enable = "sse2")]
4109    #[cfg_attr(miri, ignore)]
4112    unsafe fn test_mm_stream_si32() {
4113        let a: i32 = 7;
4114        let mut mem = boxed::Box::<i32>::new(-1);
4115        _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4116        assert_eq!(a, *mem);
4117    }
4118
4119    #[simd_test(enable = "sse2")]
4120    unsafe fn test_mm_move_epi64() {
4121        let a = _mm_setr_epi64x(5, 6);
4122        let r = _mm_move_epi64(a);
4123        assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4124    }
4125
4126    #[simd_test(enable = "sse2")]
4127    unsafe fn test_mm_packs_epi16() {
4128        let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4129        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4130        let r = _mm_packs_epi16(a, b);
4131        #[rustfmt::skip]
4132        assert_eq_m128i(
4133            r,
4134            _mm_setr_epi8(
4135                0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4136            )
4137        );
4138    }
4139
4140    #[simd_test(enable = "sse2")]
4141    unsafe fn test_mm_packs_epi32() {
4142        let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4143        let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4144        let r = _mm_packs_epi32(a, b);
4145        assert_eq_m128i(
4146            r,
4147            _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4148        );
4149    }
4150
4151    #[simd_test(enable = "sse2")]
4152    unsafe fn test_mm_packus_epi16() {
4153        let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4154        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4155        let r = _mm_packus_epi16(a, b);
4156        assert_eq_m128i(
4157            r,
4158            _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4159        );
4160    }
4161
4162    #[simd_test(enable = "sse2")]
4163    unsafe fn test_mm_extract_epi16() {
4164        let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4165        let r1 = _mm_extract_epi16::<0>(a);
4166        let r2 = _mm_extract_epi16::<3>(a);
4167        assert_eq!(r1, 0xFFFF);
4168        assert_eq!(r2, 3);
4169    }
4170
4171    #[simd_test(enable = "sse2")]
4172    unsafe fn test_mm_insert_epi16() {
4173        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4174        let r = _mm_insert_epi16::<0>(a, 9);
4175        let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4176        assert_eq_m128i(r, e);
4177    }
4178
4179    #[simd_test(enable = "sse2")]
4180    unsafe fn test_mm_movemask_epi8() {
4181        #[rustfmt::skip]
4182        let a = _mm_setr_epi8(
4183            0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4184            0b0101, 0b1111_0000u8 as i8, 0, 0,
4185            0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4186            0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4187        );
4188        let r = _mm_movemask_epi8(a);
4189        assert_eq!(r, 0b10100110_00100101);
4190    }
4191
4192    #[simd_test(enable = "sse2")]
4193    unsafe fn test_mm_shuffle_epi32() {
4194        let a = _mm_setr_epi32(5, 10, 15, 20);
4195        let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4196        let e = _mm_setr_epi32(20, 10, 10, 5);
4197        assert_eq_m128i(r, e);
4198    }
4199
4200    #[simd_test(enable = "sse2")]
4201    unsafe fn test_mm_shufflehi_epi16() {
4202        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4203        let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4204        let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4205        assert_eq_m128i(r, e);
4206    }
4207
4208    #[simd_test(enable = "sse2")]
4209    unsafe fn test_mm_shufflelo_epi16() {
4210        let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4211        let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4212        let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4213        assert_eq_m128i(r, e);
4214    }
4215
4216    #[simd_test(enable = "sse2")]
4217    unsafe fn test_mm_unpackhi_epi8() {
4218        #[rustfmt::skip]
4219        let a = _mm_setr_epi8(
4220            0, 1, 2, 3, 4, 5, 6, 7,
4221            8, 9, 10, 11, 12, 13, 14, 15,
4222        );
4223        #[rustfmt::skip]
4224        let b = _mm_setr_epi8(
4225            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4226        );
4227        let r = _mm_unpackhi_epi8(a, b);
4228        #[rustfmt::skip]
4229        let e = _mm_setr_epi8(
4230            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4231        );
4232        assert_eq_m128i(r, e);
4233    }
4234
4235    #[simd_test(enable = "sse2")]
4236    unsafe fn test_mm_unpackhi_epi16() {
4237        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4238        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4239        let r = _mm_unpackhi_epi16(a, b);
4240        let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4241        assert_eq_m128i(r, e);
4242    }
4243
4244    #[simd_test(enable = "sse2")]
4245    unsafe fn test_mm_unpackhi_epi32() {
4246        let a = _mm_setr_epi32(0, 1, 2, 3);
4247        let b = _mm_setr_epi32(4, 5, 6, 7);
4248        let r = _mm_unpackhi_epi32(a, b);
4249        let e = _mm_setr_epi32(2, 6, 3, 7);
4250        assert_eq_m128i(r, e);
4251    }
4252
4253    #[simd_test(enable = "sse2")]
4254    unsafe fn test_mm_unpackhi_epi64() {
4255        let a = _mm_setr_epi64x(0, 1);
4256        let b = _mm_setr_epi64x(2, 3);
4257        let r = _mm_unpackhi_epi64(a, b);
4258        let e = _mm_setr_epi64x(1, 3);
4259        assert_eq_m128i(r, e);
4260    }
4261
4262    #[simd_test(enable = "sse2")]
4263    unsafe fn test_mm_unpacklo_epi8() {
4264        #[rustfmt::skip]
4265        let a = _mm_setr_epi8(
4266            0, 1, 2, 3, 4, 5, 6, 7,
4267            8, 9, 10, 11, 12, 13, 14, 15,
4268        );
4269        #[rustfmt::skip]
4270        let b = _mm_setr_epi8(
4271            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4272        );
4273        let r = _mm_unpacklo_epi8(a, b);
4274        #[rustfmt::skip]
4275        let e = _mm_setr_epi8(
4276            0, 16, 1, 17, 2, 18, 3, 19,
4277            4, 20, 5, 21, 6, 22, 7, 23,
4278        );
4279        assert_eq_m128i(r, e);
4280    }
4281
4282    #[simd_test(enable = "sse2")]
4283    unsafe fn test_mm_unpacklo_epi16() {
4284        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4285        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4286        let r = _mm_unpacklo_epi16(a, b);
4287        let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4288        assert_eq_m128i(r, e);
4289    }
4290
4291    #[simd_test(enable = "sse2")]
4292    unsafe fn test_mm_unpacklo_epi32() {
4293        let a = _mm_setr_epi32(0, 1, 2, 3);
4294        let b = _mm_setr_epi32(4, 5, 6, 7);
4295        let r = _mm_unpacklo_epi32(a, b);
4296        let e = _mm_setr_epi32(0, 4, 1, 5);
4297        assert_eq_m128i(r, e);
4298    }
4299
4300    #[simd_test(enable = "sse2")]
4301    unsafe fn test_mm_unpacklo_epi64() {
4302        let a = _mm_setr_epi64x(0, 1);
4303        let b = _mm_setr_epi64x(2, 3);
4304        let r = _mm_unpacklo_epi64(a, b);
4305        let e = _mm_setr_epi64x(0, 2);
4306        assert_eq_m128i(r, e);
4307    }
4308
4309    #[simd_test(enable = "sse2")]
4310    unsafe fn test_mm_add_sd() {
4311        let a = _mm_setr_pd(1.0, 2.0);
4312        let b = _mm_setr_pd(5.0, 10.0);
4313        let r = _mm_add_sd(a, b);
4314        assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4315    }
4316
4317    #[simd_test(enable = "sse2")]
4318    unsafe fn test_mm_add_pd() {
4319        let a = _mm_setr_pd(1.0, 2.0);
4320        let b = _mm_setr_pd(5.0, 10.0);
4321        let r = _mm_add_pd(a, b);
4322        assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4323    }
4324
4325    #[simd_test(enable = "sse2")]
4326    unsafe fn test_mm_div_sd() {
4327        let a = _mm_setr_pd(1.0, 2.0);
4328        let b = _mm_setr_pd(5.0, 10.0);
4329        let r = _mm_div_sd(a, b);
4330        assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4331    }
4332
4333    #[simd_test(enable = "sse2")]
4334    unsafe fn test_mm_div_pd() {
4335        let a = _mm_setr_pd(1.0, 2.0);
4336        let b = _mm_setr_pd(5.0, 10.0);
4337        let r = _mm_div_pd(a, b);
4338        assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4339    }
4340
4341    #[simd_test(enable = "sse2")]
4342    unsafe fn test_mm_max_sd() {
4343        let a = _mm_setr_pd(1.0, 2.0);
4344        let b = _mm_setr_pd(5.0, 10.0);
4345        let r = _mm_max_sd(a, b);
4346        assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4347    }
4348
4349    #[simd_test(enable = "sse2")]
4350    unsafe fn test_mm_max_pd() {
4351        let a = _mm_setr_pd(1.0, 2.0);
4352        let b = _mm_setr_pd(5.0, 10.0);
4353        let r = _mm_max_pd(a, b);
4354        assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4355
4356        let a = _mm_setr_pd(-0.0, 0.0);
4358        let b = _mm_setr_pd(0.0, 0.0);
4359        let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4360        let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4361        let a: [u8; 16] = transmute(a);
4362        let b: [u8; 16] = transmute(b);
4363        assert_eq!(r1, b);
4364        assert_eq!(r2, a);
4365        assert_ne!(a, b); }
4367
4368    #[simd_test(enable = "sse2")]
4369    unsafe fn test_mm_min_sd() {
4370        let a = _mm_setr_pd(1.0, 2.0);
4371        let b = _mm_setr_pd(5.0, 10.0);
4372        let r = _mm_min_sd(a, b);
4373        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4374    }
4375
4376    #[simd_test(enable = "sse2")]
4377    unsafe fn test_mm_min_pd() {
4378        let a = _mm_setr_pd(1.0, 2.0);
4379        let b = _mm_setr_pd(5.0, 10.0);
4380        let r = _mm_min_pd(a, b);
4381        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4382
4383        let a = _mm_setr_pd(-0.0, 0.0);
4385        let b = _mm_setr_pd(0.0, 0.0);
4386        let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4387        let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4388        let a: [u8; 16] = transmute(a);
4389        let b: [u8; 16] = transmute(b);
4390        assert_eq!(r1, b);
4391        assert_eq!(r2, a);
4392        assert_ne!(a, b); }
4394
4395    #[simd_test(enable = "sse2")]
4396    unsafe fn test_mm_mul_sd() {
4397        let a = _mm_setr_pd(1.0, 2.0);
4398        let b = _mm_setr_pd(5.0, 10.0);
4399        let r = _mm_mul_sd(a, b);
4400        assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4401    }
4402
4403    #[simd_test(enable = "sse2")]
4404    unsafe fn test_mm_mul_pd() {
4405        let a = _mm_setr_pd(1.0, 2.0);
4406        let b = _mm_setr_pd(5.0, 10.0);
4407        let r = _mm_mul_pd(a, b);
4408        assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4409    }
4410
4411    #[simd_test(enable = "sse2")]
4412    unsafe fn test_mm_sqrt_sd() {
4413        let a = _mm_setr_pd(1.0, 2.0);
4414        let b = _mm_setr_pd(5.0, 10.0);
4415        let r = _mm_sqrt_sd(a, b);
4416        assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4417    }
4418
4419    #[simd_test(enable = "sse2")]
4420    unsafe fn test_mm_sqrt_pd() {
4421        let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4422        assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4423    }
4424
4425    #[simd_test(enable = "sse2")]
4426    unsafe fn test_mm_sub_sd() {
4427        let a = _mm_setr_pd(1.0, 2.0);
4428        let b = _mm_setr_pd(5.0, 10.0);
4429        let r = _mm_sub_sd(a, b);
4430        assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4431    }
4432
4433    #[simd_test(enable = "sse2")]
4434    unsafe fn test_mm_sub_pd() {
4435        let a = _mm_setr_pd(1.0, 2.0);
4436        let b = _mm_setr_pd(5.0, 10.0);
4437        let r = _mm_sub_pd(a, b);
4438        assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4439    }
4440
4441    #[simd_test(enable = "sse2")]
4442    unsafe fn test_mm_and_pd() {
4443        let a = transmute(u64x2::splat(5));
4444        let b = transmute(u64x2::splat(3));
4445        let r = _mm_and_pd(a, b);
4446        let e = transmute(u64x2::splat(1));
4447        assert_eq_m128d(r, e);
4448    }
4449
4450    #[simd_test(enable = "sse2")]
4451    unsafe fn test_mm_andnot_pd() {
4452        let a = transmute(u64x2::splat(5));
4453        let b = transmute(u64x2::splat(3));
4454        let r = _mm_andnot_pd(a, b);
4455        let e = transmute(u64x2::splat(2));
4456        assert_eq_m128d(r, e);
4457    }
4458
4459    #[simd_test(enable = "sse2")]
4460    unsafe fn test_mm_or_pd() {
4461        let a = transmute(u64x2::splat(5));
4462        let b = transmute(u64x2::splat(3));
4463        let r = _mm_or_pd(a, b);
4464        let e = transmute(u64x2::splat(7));
4465        assert_eq_m128d(r, e);
4466    }
4467
4468    #[simd_test(enable = "sse2")]
4469    unsafe fn test_mm_xor_pd() {
4470        let a = transmute(u64x2::splat(5));
4471        let b = transmute(u64x2::splat(3));
4472        let r = _mm_xor_pd(a, b);
4473        let e = transmute(u64x2::splat(6));
4474        assert_eq_m128d(r, e);
4475    }
4476
4477    #[simd_test(enable = "sse2")]
4478    unsafe fn test_mm_cmpeq_sd() {
4479        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4480        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4481        let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4482        assert_eq_m128i(r, e);
4483    }
4484
4485    #[simd_test(enable = "sse2")]
4486    unsafe fn test_mm_cmplt_sd() {
4487        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4488        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4489        let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4490        assert_eq_m128i(r, e);
4491    }
4492
4493    #[simd_test(enable = "sse2")]
4494    unsafe fn test_mm_cmple_sd() {
4495        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4496        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4497        let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4498        assert_eq_m128i(r, e);
4499    }
4500
4501    #[simd_test(enable = "sse2")]
4502    unsafe fn test_mm_cmpgt_sd() {
4503        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4504        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4505        let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4506        assert_eq_m128i(r, e);
4507    }
4508
4509    #[simd_test(enable = "sse2")]
4510    unsafe fn test_mm_cmpge_sd() {
4511        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4512        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4513        let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4514        assert_eq_m128i(r, e);
4515    }
4516
4517    #[simd_test(enable = "sse2")]
4518    unsafe fn test_mm_cmpord_sd() {
4519        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4520        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4521        let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4522        assert_eq_m128i(r, e);
4523    }
4524
4525    #[simd_test(enable = "sse2")]
4526    unsafe fn test_mm_cmpunord_sd() {
4527        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4528        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4529        let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4530        assert_eq_m128i(r, e);
4531    }
4532
4533    #[simd_test(enable = "sse2")]
4534    unsafe fn test_mm_cmpneq_sd() {
4535        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4536        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4537        let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4538        assert_eq_m128i(r, e);
4539    }
4540
4541    #[simd_test(enable = "sse2")]
4542    unsafe fn test_mm_cmpnlt_sd() {
4543        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4544        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4545        let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4546        assert_eq_m128i(r, e);
4547    }
4548
4549    #[simd_test(enable = "sse2")]
4550    unsafe fn test_mm_cmpnle_sd() {
4551        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4552        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4553        let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4554        assert_eq_m128i(r, e);
4555    }
4556
4557    #[simd_test(enable = "sse2")]
4558    unsafe fn test_mm_cmpngt_sd() {
4559        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4560        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4561        let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4562        assert_eq_m128i(r, e);
4563    }
4564
4565    #[simd_test(enable = "sse2")]
4566    unsafe fn test_mm_cmpnge_sd() {
4567        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4568        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4569        let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4570        assert_eq_m128i(r, e);
4571    }
4572
4573    #[simd_test(enable = "sse2")]
4574    unsafe fn test_mm_cmpeq_pd() {
4575        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4576        let e = _mm_setr_epi64x(!0, 0);
4577        let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4578        assert_eq_m128i(r, e);
4579    }
4580
4581    #[simd_test(enable = "sse2")]
4582    unsafe fn test_mm_cmplt_pd() {
4583        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4584        let e = _mm_setr_epi64x(0, !0);
4585        let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4586        assert_eq_m128i(r, e);
4587    }
4588
4589    #[simd_test(enable = "sse2")]
4590    unsafe fn test_mm_cmple_pd() {
4591        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4592        let e = _mm_setr_epi64x(!0, !0);
4593        let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4594        assert_eq_m128i(r, e);
4595    }
4596
4597    #[simd_test(enable = "sse2")]
4598    unsafe fn test_mm_cmpgt_pd() {
4599        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4600        let e = _mm_setr_epi64x(0, 0);
4601        let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4602        assert_eq_m128i(r, e);
4603    }
4604
4605    #[simd_test(enable = "sse2")]
4606    unsafe fn test_mm_cmpge_pd() {
4607        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4608        let e = _mm_setr_epi64x(!0, 0);
4609        let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4610        assert_eq_m128i(r, e);
4611    }
4612
4613    #[simd_test(enable = "sse2")]
4614    unsafe fn test_mm_cmpord_pd() {
4615        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4616        let e = _mm_setr_epi64x(0, !0);
4617        let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4618        assert_eq_m128i(r, e);
4619    }
4620
4621    #[simd_test(enable = "sse2")]
4622    unsafe fn test_mm_cmpunord_pd() {
4623        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4624        let e = _mm_setr_epi64x(!0, 0);
4625        let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4626        assert_eq_m128i(r, e);
4627    }
4628
4629    #[simd_test(enable = "sse2")]
4630    unsafe fn test_mm_cmpneq_pd() {
4631        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4632        let e = _mm_setr_epi64x(!0, !0);
4633        let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4634        assert_eq_m128i(r, e);
4635    }
4636
4637    #[simd_test(enable = "sse2")]
4638    unsafe fn test_mm_cmpnlt_pd() {
4639        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4640        let e = _mm_setr_epi64x(0, 0);
4641        let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4642        assert_eq_m128i(r, e);
4643    }
4644
4645    #[simd_test(enable = "sse2")]
4646    unsafe fn test_mm_cmpnle_pd() {
4647        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4648        let e = _mm_setr_epi64x(0, 0);
4649        let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4650        assert_eq_m128i(r, e);
4651    }
4652
4653    #[simd_test(enable = "sse2")]
4654    unsafe fn test_mm_cmpngt_pd() {
4655        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4656        let e = _mm_setr_epi64x(0, !0);
4657        let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4658        assert_eq_m128i(r, e);
4659    }
4660
4661    #[simd_test(enable = "sse2")]
4662    unsafe fn test_mm_cmpnge_pd() {
4663        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4664        let e = _mm_setr_epi64x(0, !0);
4665        let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4666        assert_eq_m128i(r, e);
4667    }
4668
4669    #[simd_test(enable = "sse2")]
4670    unsafe fn test_mm_comieq_sd() {
4671        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4672        assert!(_mm_comieq_sd(a, b) != 0);
4673
4674        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4675        assert!(_mm_comieq_sd(a, b) == 0);
4676    }
4677
4678    #[simd_test(enable = "sse2")]
4679    unsafe fn test_mm_comilt_sd() {
4680        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4681        assert!(_mm_comilt_sd(a, b) == 0);
4682    }
4683
4684    #[simd_test(enable = "sse2")]
4685    unsafe fn test_mm_comile_sd() {
4686        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4687        assert!(_mm_comile_sd(a, b) != 0);
4688    }
4689
4690    #[simd_test(enable = "sse2")]
4691    unsafe fn test_mm_comigt_sd() {
4692        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4693        assert!(_mm_comigt_sd(a, b) == 0);
4694    }
4695
4696    #[simd_test(enable = "sse2")]
4697    unsafe fn test_mm_comige_sd() {
4698        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4699        assert!(_mm_comige_sd(a, b) != 0);
4700    }
4701
4702    #[simd_test(enable = "sse2")]
4703    unsafe fn test_mm_comineq_sd() {
4704        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4705        assert!(_mm_comineq_sd(a, b) == 0);
4706    }
4707
4708    #[simd_test(enable = "sse2")]
4709    unsafe fn test_mm_ucomieq_sd() {
4710        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4711        assert!(_mm_ucomieq_sd(a, b) != 0);
4712
4713        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4714        assert!(_mm_ucomieq_sd(a, b) == 0);
4715    }
4716
4717    #[simd_test(enable = "sse2")]
4718    unsafe fn test_mm_ucomilt_sd() {
4719        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4720        assert!(_mm_ucomilt_sd(a, b) == 0);
4721    }
4722
4723    #[simd_test(enable = "sse2")]
4724    unsafe fn test_mm_ucomile_sd() {
4725        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4726        assert!(_mm_ucomile_sd(a, b) != 0);
4727    }
4728
4729    #[simd_test(enable = "sse2")]
4730    unsafe fn test_mm_ucomigt_sd() {
4731        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4732        assert!(_mm_ucomigt_sd(a, b) == 0);
4733    }
4734
4735    #[simd_test(enable = "sse2")]
4736    unsafe fn test_mm_ucomige_sd() {
4737        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4738        assert!(_mm_ucomige_sd(a, b) != 0);
4739    }
4740
4741    #[simd_test(enable = "sse2")]
4742    unsafe fn test_mm_ucomineq_sd() {
4743        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4744        assert!(_mm_ucomineq_sd(a, b) == 0);
4745    }
4746
4747    #[simd_test(enable = "sse2")]
4748    unsafe fn test_mm_movemask_pd() {
4749        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4750        assert_eq!(r, 0b01);
4751
4752        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4753        assert_eq!(r, 0b11);
4754    }
4755
4756    #[repr(align(16))]
4757    struct Memory {
4758        data: [f64; 4],
4759    }
4760
4761    #[simd_test(enable = "sse2")]
4762    unsafe fn test_mm_load_pd() {
4763        let mem = Memory {
4764            data: [1.0f64, 2.0, 3.0, 4.0],
4765        };
4766        let vals = &mem.data;
4767        let d = vals.as_ptr();
4768
4769        let r = _mm_load_pd(d);
4770        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4771    }
4772
4773    #[simd_test(enable = "sse2")]
4774    unsafe fn test_mm_load_sd() {
4775        let a = 1.;
4776        let expected = _mm_setr_pd(a, 0.);
4777        let r = _mm_load_sd(&a);
4778        assert_eq_m128d(r, expected);
4779    }
4780
4781    #[simd_test(enable = "sse2")]
4782    unsafe fn test_mm_loadh_pd() {
4783        let a = _mm_setr_pd(1., 2.);
4784        let b = 3.;
4785        let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4786        let r = _mm_loadh_pd(a, &b);
4787        assert_eq_m128d(r, expected);
4788    }
4789
4790    #[simd_test(enable = "sse2")]
4791    unsafe fn test_mm_loadl_pd() {
4792        let a = _mm_setr_pd(1., 2.);
4793        let b = 3.;
4794        let expected = _mm_setr_pd(3., get_m128d(a, 1));
4795        let r = _mm_loadl_pd(a, &b);
4796        assert_eq_m128d(r, expected);
4797    }
4798
4799    #[simd_test(enable = "sse2")]
4800    #[cfg_attr(miri, ignore)]
4803    unsafe fn test_mm_stream_pd() {
4804        #[repr(align(128))]
4805        struct Memory {
4806            pub data: [f64; 2],
4807        }
4808        let a = _mm_set1_pd(7.0);
4809        let mut mem = Memory { data: [-1.0; 2] };
4810
4811        _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4812        for i in 0..2 {
4813            assert_eq!(mem.data[i], get_m128d(a, i));
4814        }
4815    }
4816
4817    #[simd_test(enable = "sse2")]
4818    unsafe fn test_mm_store_sd() {
4819        let mut dest = 0.;
4820        let a = _mm_setr_pd(1., 2.);
4821        _mm_store_sd(&mut dest, a);
4822        assert_eq!(dest, _mm_cvtsd_f64(a));
4823    }
4824
4825    #[simd_test(enable = "sse2")]
4826    unsafe fn test_mm_store_pd() {
4827        let mut mem = Memory { data: [0.0f64; 4] };
4828        let vals = &mut mem.data;
4829        let a = _mm_setr_pd(1.0, 2.0);
4830        let d = vals.as_mut_ptr();
4831
4832        _mm_store_pd(d, *black_box(&a));
4833        assert_eq!(vals[0], 1.0);
4834        assert_eq!(vals[1], 2.0);
4835    }
4836
4837    #[simd_test(enable = "sse2")]
4838    unsafe fn test_mm_storeu_pd() {
4839        let mut mem = Memory { data: [0.0f64; 4] };
4840        let vals = &mut mem.data;
4841        let a = _mm_setr_pd(1.0, 2.0);
4842
4843        let mut ofs = 0;
4844        let mut p = vals.as_mut_ptr();
4845
4846        if (p as usize) & 0xf == 0 {
4848            ofs = 1;
4849            p = p.add(1);
4850        }
4851
4852        _mm_storeu_pd(p, *black_box(&a));
4853
4854        if ofs > 0 {
4855            assert_eq!(vals[ofs - 1], 0.0);
4856        }
4857        assert_eq!(vals[ofs + 0], 1.0);
4858        assert_eq!(vals[ofs + 1], 2.0);
4859    }
4860
4861    #[simd_test(enable = "sse2")]
4862    unsafe fn test_mm_storeu_si16() {
4863        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4864        let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4865        _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4866        let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4867        assert_eq_m128i(r, e);
4868    }
4869
4870    #[simd_test(enable = "sse2")]
4871    unsafe fn test_mm_storeu_si32() {
4872        let a = _mm_setr_epi32(1, 2, 3, 4);
4873        let mut r = _mm_setr_epi32(5, 6, 7, 8);
4874        _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4875        let e = _mm_setr_epi32(1, 6, 7, 8);
4876        assert_eq_m128i(r, e);
4877    }
4878
4879    #[simd_test(enable = "sse2")]
4880    unsafe fn test_mm_storeu_si64() {
4881        let a = _mm_setr_epi64x(1, 2);
4882        let mut r = _mm_setr_epi64x(3, 4);
4883        _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4884        let e = _mm_setr_epi64x(1, 4);
4885        assert_eq_m128i(r, e);
4886    }
4887
4888    #[simd_test(enable = "sse2")]
4889    unsafe fn test_mm_store1_pd() {
4890        let mut mem = Memory { data: [0.0f64; 4] };
4891        let vals = &mut mem.data;
4892        let a = _mm_setr_pd(1.0, 2.0);
4893        let d = vals.as_mut_ptr();
4894
4895        _mm_store1_pd(d, *black_box(&a));
4896        assert_eq!(vals[0], 1.0);
4897        assert_eq!(vals[1], 1.0);
4898    }
4899
4900    #[simd_test(enable = "sse2")]
4901    unsafe fn test_mm_store_pd1() {
4902        let mut mem = Memory { data: [0.0f64; 4] };
4903        let vals = &mut mem.data;
4904        let a = _mm_setr_pd(1.0, 2.0);
4905        let d = vals.as_mut_ptr();
4906
4907        _mm_store_pd1(d, *black_box(&a));
4908        assert_eq!(vals[0], 1.0);
4909        assert_eq!(vals[1], 1.0);
4910    }
4911
4912    #[simd_test(enable = "sse2")]
4913    unsafe fn test_mm_storer_pd() {
4914        let mut mem = Memory { data: [0.0f64; 4] };
4915        let vals = &mut mem.data;
4916        let a = _mm_setr_pd(1.0, 2.0);
4917        let d = vals.as_mut_ptr();
4918
4919        _mm_storer_pd(d, *black_box(&a));
4920        assert_eq!(vals[0], 2.0);
4921        assert_eq!(vals[1], 1.0);
4922    }
4923
4924    #[simd_test(enable = "sse2")]
4925    unsafe fn test_mm_storeh_pd() {
4926        let mut dest = 0.;
4927        let a = _mm_setr_pd(1., 2.);
4928        _mm_storeh_pd(&mut dest, a);
4929        assert_eq!(dest, get_m128d(a, 1));
4930    }
4931
4932    #[simd_test(enable = "sse2")]
4933    unsafe fn test_mm_storel_pd() {
4934        let mut dest = 0.;
4935        let a = _mm_setr_pd(1., 2.);
4936        _mm_storel_pd(&mut dest, a);
4937        assert_eq!(dest, _mm_cvtsd_f64(a));
4938    }
4939
4940    #[simd_test(enable = "sse2")]
4941    unsafe fn test_mm_loadr_pd() {
4942        let mut mem = Memory {
4943            data: [1.0f64, 2.0, 3.0, 4.0],
4944        };
4945        let vals = &mut mem.data;
4946        let d = vals.as_ptr();
4947
4948        let r = _mm_loadr_pd(d);
4949        assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4950    }
4951
4952    #[simd_test(enable = "sse2")]
4953    unsafe fn test_mm_loadu_pd() {
4954        let mut mem = Memory {
4955            data: [1.0f64, 2.0, 3.0, 4.0],
4956        };
4957        let vals = &mut mem.data;
4958        let mut d = vals.as_ptr();
4959
4960        let mut offset = 0;
4962        if (d as usize) & 0xf == 0 {
4963            offset = 1;
4964            d = d.add(offset);
4965        }
4966
4967        let r = _mm_loadu_pd(d);
4968        let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4969        assert_eq_m128d(r, e);
4970    }
4971
4972    #[simd_test(enable = "sse2")]
4973    unsafe fn test_mm_loadu_si16() {
4974        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4975        let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4976        assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4977    }
4978
4979    #[simd_test(enable = "sse2")]
4980    unsafe fn test_mm_loadu_si32() {
4981        let a = _mm_setr_epi32(1, 2, 3, 4);
4982        let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4983        assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4984    }
4985
4986    #[simd_test(enable = "sse2")]
4987    unsafe fn test_mm_loadu_si64() {
4988        let a = _mm_setr_epi64x(5, 6);
4989        let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4990        assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4991    }
4992
4993    #[simd_test(enable = "sse2")]
4994    unsafe fn test_mm_cvtpd_ps() {
4995        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4996        assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4997
4998        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4999        assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5000
5001        let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5002        assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5003
5004        let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5005        assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5006    }
5007
5008    #[simd_test(enable = "sse2")]
5009    unsafe fn test_mm_cvtps_pd() {
5010        let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5011        assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5012
5013        let r = _mm_cvtps_pd(_mm_setr_ps(
5014            f32::MAX,
5015            f32::INFINITY,
5016            f32::NEG_INFINITY,
5017            f32::MIN,
5018        ));
5019        assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5020    }
5021
5022    #[simd_test(enable = "sse2")]
5023    unsafe fn test_mm_cvtpd_epi32() {
5024        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5025        assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5026
5027        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5028        assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5029
5030        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5031        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5032
5033        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5034        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5035
5036        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5037        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5038    }
5039
5040    #[simd_test(enable = "sse2")]
5041    unsafe fn test_mm_cvtsd_si32() {
5042        let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5043        assert_eq!(r, -2);
5044
5045        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5046        assert_eq!(r, i32::MIN);
5047
5048        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5049        assert_eq!(r, i32::MIN);
5050    }
5051
5052    #[simd_test(enable = "sse2")]
5053    unsafe fn test_mm_cvtsd_ss() {
5054        let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5055        let b = _mm_setr_pd(2.0, -5.0);
5056
5057        let r = _mm_cvtsd_ss(a, b);
5058
5059        assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5060
5061        let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5062        let b = _mm_setr_pd(f64::INFINITY, -5.0);
5063
5064        let r = _mm_cvtsd_ss(a, b);
5065
5066        assert_eq_m128(
5067            r,
5068            _mm_setr_ps(
5069                f32::INFINITY,
5070                f32::NEG_INFINITY,
5071                f32::MAX,
5072                f32::NEG_INFINITY,
5073            ),
5074        );
5075    }
5076
5077    #[simd_test(enable = "sse2")]
5078    unsafe fn test_mm_cvtsd_f64() {
5079        let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5080        assert_eq!(r, -1.1);
5081    }
5082
5083    #[simd_test(enable = "sse2")]
5084    unsafe fn test_mm_cvtss_sd() {
5085        let a = _mm_setr_pd(-1.1, 2.2);
5086        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5087
5088        let r = _mm_cvtss_sd(a, b);
5089        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5090
5091        let a = _mm_setr_pd(-1.1, f64::INFINITY);
5092        let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5093
5094        let r = _mm_cvtss_sd(a, b);
5095        assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5096    }
5097
5098    #[simd_test(enable = "sse2")]
5099    unsafe fn test_mm_cvttpd_epi32() {
5100        let a = _mm_setr_pd(-1.1, 2.2);
5101        let r = _mm_cvttpd_epi32(a);
5102        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5103
5104        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5105        let r = _mm_cvttpd_epi32(a);
5106        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5107    }
5108
5109    #[simd_test(enable = "sse2")]
5110    unsafe fn test_mm_cvttsd_si32() {
5111        let a = _mm_setr_pd(-1.1, 2.2);
5112        let r = _mm_cvttsd_si32(a);
5113        assert_eq!(r, -1);
5114
5115        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5116        let r = _mm_cvttsd_si32(a);
5117        assert_eq!(r, i32::MIN);
5118    }
5119
5120    #[simd_test(enable = "sse2")]
5121    unsafe fn test_mm_cvttps_epi32() {
5122        let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5123        let r = _mm_cvttps_epi32(a);
5124        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5125
5126        let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5127        let r = _mm_cvttps_epi32(a);
5128        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5129    }
5130
5131    #[simd_test(enable = "sse2")]
5132    unsafe fn test_mm_set_sd() {
5133        let r = _mm_set_sd(-1.0_f64);
5134        assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5135    }
5136
5137    #[simd_test(enable = "sse2")]
5138    unsafe fn test_mm_set1_pd() {
5139        let r = _mm_set1_pd(-1.0_f64);
5140        assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5141    }
5142
5143    #[simd_test(enable = "sse2")]
5144    unsafe fn test_mm_set_pd1() {
5145        let r = _mm_set_pd1(-2.0_f64);
5146        assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5147    }
5148
5149    #[simd_test(enable = "sse2")]
5150    unsafe fn test_mm_set_pd() {
5151        let r = _mm_set_pd(1.0_f64, 5.0_f64);
5152        assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5153    }
5154
5155    #[simd_test(enable = "sse2")]
5156    unsafe fn test_mm_setr_pd() {
5157        let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5158        assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5159    }
5160
5161    #[simd_test(enable = "sse2")]
5162    unsafe fn test_mm_setzero_pd() {
5163        let r = _mm_setzero_pd();
5164        assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5165    }
5166
5167    #[simd_test(enable = "sse2")]
5168    unsafe fn test_mm_load1_pd() {
5169        let d = -5.0;
5170        let r = _mm_load1_pd(&d);
5171        assert_eq_m128d(r, _mm_setr_pd(d, d));
5172    }
5173
5174    #[simd_test(enable = "sse2")]
5175    unsafe fn test_mm_load_pd1() {
5176        let d = -5.0;
5177        let r = _mm_load_pd1(&d);
5178        assert_eq_m128d(r, _mm_setr_pd(d, d));
5179    }
5180
5181    #[simd_test(enable = "sse2")]
5182    unsafe fn test_mm_unpackhi_pd() {
5183        let a = _mm_setr_pd(1.0, 2.0);
5184        let b = _mm_setr_pd(3.0, 4.0);
5185        let r = _mm_unpackhi_pd(a, b);
5186        assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5187    }
5188
5189    #[simd_test(enable = "sse2")]
5190    unsafe fn test_mm_unpacklo_pd() {
5191        let a = _mm_setr_pd(1.0, 2.0);
5192        let b = _mm_setr_pd(3.0, 4.0);
5193        let r = _mm_unpacklo_pd(a, b);
5194        assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5195    }
5196
5197    #[simd_test(enable = "sse2")]
5198    unsafe fn test_mm_shuffle_pd() {
5199        let a = _mm_setr_pd(1., 2.);
5200        let b = _mm_setr_pd(3., 4.);
5201        let expected = _mm_setr_pd(1., 3.);
5202        let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5203        assert_eq_m128d(r, expected);
5204    }
5205
5206    #[simd_test(enable = "sse2")]
5207    unsafe fn test_mm_move_sd() {
5208        let a = _mm_setr_pd(1., 2.);
5209        let b = _mm_setr_pd(3., 4.);
5210        let expected = _mm_setr_pd(3., 2.);
5211        let r = _mm_move_sd(a, b);
5212        assert_eq_m128d(r, expected);
5213    }
5214
5215    #[simd_test(enable = "sse2")]
5216    unsafe fn test_mm_castpd_ps() {
5217        let a = _mm_set1_pd(0.);
5218        let expected = _mm_set1_ps(0.);
5219        let r = _mm_castpd_ps(a);
5220        assert_eq_m128(r, expected);
5221    }
5222
5223    #[simd_test(enable = "sse2")]
5224    unsafe fn test_mm_castpd_si128() {
5225        let a = _mm_set1_pd(0.);
5226        let expected = _mm_set1_epi64x(0);
5227        let r = _mm_castpd_si128(a);
5228        assert_eq_m128i(r, expected);
5229    }
5230
5231    #[simd_test(enable = "sse2")]
5232    unsafe fn test_mm_castps_pd() {
5233        let a = _mm_set1_ps(0.);
5234        let expected = _mm_set1_pd(0.);
5235        let r = _mm_castps_pd(a);
5236        assert_eq_m128d(r, expected);
5237    }
5238
5239    #[simd_test(enable = "sse2")]
5240    unsafe fn test_mm_castps_si128() {
5241        let a = _mm_set1_ps(0.);
5242        let expected = _mm_set1_epi32(0);
5243        let r = _mm_castps_si128(a);
5244        assert_eq_m128i(r, expected);
5245    }
5246
5247    #[simd_test(enable = "sse2")]
5248    unsafe fn test_mm_castsi128_pd() {
5249        let a = _mm_set1_epi64x(0);
5250        let expected = _mm_set1_pd(0.);
5251        let r = _mm_castsi128_pd(a);
5252        assert_eq_m128d(r, expected);
5253    }
5254
5255    #[simd_test(enable = "sse2")]
5256    unsafe fn test_mm_castsi128_ps() {
5257        let a = _mm_set1_epi32(0);
5258        let expected = _mm_set1_ps(0.);
5259        let r = _mm_castsi128_ps(a);
5260        assert_eq_m128(r, expected);
5261    }
5262}