1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23 unsafe { pause() }
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub fn _mm_lfence() {
53 unsafe { lfence() }
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _mm_mfence() {
69 unsafe { mfence() }
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
80pub const fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
81 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
82}
83
84#[inline]
88#[target_feature(enable = "sse2")]
89#[cfg_attr(test, assert_instr(paddw))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
92pub const fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
93 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
94}
95
96#[inline]
100#[target_feature(enable = "sse2")]
101#[cfg_attr(test, assert_instr(paddd))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
104pub const fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
105 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
106}
107
108#[inline]
112#[target_feature(enable = "sse2")]
113#[cfg_attr(test, assert_instr(paddq))]
114#[stable(feature = "simd_x86", since = "1.27.0")]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
117 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
118}
119
120#[inline]
124#[target_feature(enable = "sse2")]
125#[cfg_attr(test, assert_instr(paddsb))]
126#[stable(feature = "simd_x86", since = "1.27.0")]
127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
128pub const fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
129 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
130}
131
132#[inline]
136#[target_feature(enable = "sse2")]
137#[cfg_attr(test, assert_instr(paddsw))]
138#[stable(feature = "simd_x86", since = "1.27.0")]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
141 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
142}
143
144#[inline]
148#[target_feature(enable = "sse2")]
149#[cfg_attr(test, assert_instr(paddusb))]
150#[stable(feature = "simd_x86", since = "1.27.0")]
151#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
152pub const fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
153 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
154}
155
156#[inline]
160#[target_feature(enable = "sse2")]
161#[cfg_attr(test, assert_instr(paddusw))]
162#[stable(feature = "simd_x86", since = "1.27.0")]
163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
164pub const fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
165 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
166}
167
168#[inline]
172#[target_feature(enable = "sse2")]
173#[cfg_attr(test, assert_instr(pavgb))]
174#[stable(feature = "simd_x86", since = "1.27.0")]
175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
176pub const fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
177 unsafe {
178 let a = simd_cast::<_, u16x16>(a.as_u8x16());
179 let b = simd_cast::<_, u16x16>(b.as_u8x16());
180 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
181 transmute(simd_cast::<_, u8x16>(r))
182 }
183}
184
185#[inline]
189#[target_feature(enable = "sse2")]
190#[cfg_attr(test, assert_instr(pavgw))]
191#[stable(feature = "simd_x86", since = "1.27.0")]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
194 unsafe {
195 let a = simd_cast::<_, u32x8>(a.as_u16x8());
196 let b = simd_cast::<_, u32x8>(b.as_u16x8());
197 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
198 transmute(simd_cast::<_, u16x8>(r))
199 }
200}
201
202#[inline]
210#[target_feature(enable = "sse2")]
211#[cfg_attr(test, assert_instr(pmaddwd))]
212#[stable(feature = "simd_x86", since = "1.27.0")]
213pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
214 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
227}
228
229#[inline]
234#[target_feature(enable = "sse2")]
235#[cfg_attr(test, assert_instr(pmaxsw))]
236#[stable(feature = "simd_x86", since = "1.27.0")]
237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
238pub const fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
239 unsafe { simd_imax(a.as_i16x8(), b.as_i16x8()).as_m128i() }
240}
241
242#[inline]
247#[target_feature(enable = "sse2")]
248#[cfg_attr(test, assert_instr(pmaxub))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
251pub const fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
252 unsafe { simd_imax(a.as_u8x16(), b.as_u8x16()).as_m128i() }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminsw))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
264pub const fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
265 unsafe { simd_imin(a.as_i16x8(), b.as_i16x8()).as_m128i() }
266}
267
268#[inline]
273#[target_feature(enable = "sse2")]
274#[cfg_attr(test, assert_instr(pminub))]
275#[stable(feature = "simd_x86", since = "1.27.0")]
276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
277pub const fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
278 unsafe { simd_imin(a.as_u8x16(), b.as_u8x16()).as_m128i() }
279}
280
281#[inline]
288#[target_feature(enable = "sse2")]
289#[cfg_attr(test, assert_instr(pmulhw))]
290#[stable(feature = "simd_x86", since = "1.27.0")]
291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
292pub const fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
293 unsafe {
294 let a = simd_cast::<_, i32x8>(a.as_i16x8());
295 let b = simd_cast::<_, i32x8>(b.as_i16x8());
296 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
297 transmute(simd_cast::<i32x8, i16x8>(r))
298 }
299}
300
301#[inline]
308#[target_feature(enable = "sse2")]
309#[cfg_attr(test, assert_instr(pmulhuw))]
310#[stable(feature = "simd_x86", since = "1.27.0")]
311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
312pub const fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
313 unsafe {
314 let a = simd_cast::<_, u32x8>(a.as_u16x8());
315 let b = simd_cast::<_, u32x8>(b.as_u16x8());
316 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
317 transmute(simd_cast::<u32x8, u16x8>(r))
318 }
319}
320
321#[inline]
328#[target_feature(enable = "sse2")]
329#[cfg_attr(test, assert_instr(pmullw))]
330#[stable(feature = "simd_x86", since = "1.27.0")]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
333 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
334}
335
336#[inline]
343#[target_feature(enable = "sse2")]
344#[cfg_attr(test, assert_instr(pmuludq))]
345#[stable(feature = "simd_x86", since = "1.27.0")]
346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347pub const fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
348 unsafe {
349 let a = a.as_u64x2();
350 let b = b.as_u64x2();
351 let mask = u64x2::splat(u32::MAX as u64);
352 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
353 }
354}
355
356#[inline]
365#[target_feature(enable = "sse2")]
366#[cfg_attr(test, assert_instr(psadbw))]
367#[stable(feature = "simd_x86", since = "1.27.0")]
368pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
369 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
370}
371
372#[inline]
376#[target_feature(enable = "sse2")]
377#[cfg_attr(test, assert_instr(psubb))]
378#[stable(feature = "simd_x86", since = "1.27.0")]
379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
380pub const fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
381 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
382}
383
384#[inline]
388#[target_feature(enable = "sse2")]
389#[cfg_attr(test, assert_instr(psubw))]
390#[stable(feature = "simd_x86", since = "1.27.0")]
391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392pub const fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
393 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
394}
395
396#[inline]
400#[target_feature(enable = "sse2")]
401#[cfg_attr(test, assert_instr(psubd))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
405 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
406}
407
408#[inline]
412#[target_feature(enable = "sse2")]
413#[cfg_attr(test, assert_instr(psubq))]
414#[stable(feature = "simd_x86", since = "1.27.0")]
415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
416pub const fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
417 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
418}
419
420#[inline]
425#[target_feature(enable = "sse2")]
426#[cfg_attr(test, assert_instr(psubsb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
430 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
431}
432
433#[inline]
438#[target_feature(enable = "sse2")]
439#[cfg_attr(test, assert_instr(psubsw))]
440#[stable(feature = "simd_x86", since = "1.27.0")]
441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
442pub const fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
443 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
444}
445
446#[inline]
451#[target_feature(enable = "sse2")]
452#[cfg_attr(test, assert_instr(psubusb))]
453#[stable(feature = "simd_x86", since = "1.27.0")]
454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
455pub const fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
456 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
457}
458
459#[inline]
464#[target_feature(enable = "sse2")]
465#[cfg_attr(test, assert_instr(psubusw))]
466#[stable(feature = "simd_x86", since = "1.27.0")]
467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
468pub const fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
469 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
470}
471
472#[inline]
476#[target_feature(enable = "sse2")]
477#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
478#[rustc_legacy_const_generics(1)]
479#[stable(feature = "simd_x86", since = "1.27.0")]
480#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
481pub const fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
482 static_assert_uimm_bits!(IMM8, 8);
483 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
484}
485
486#[inline]
489#[target_feature(enable = "sse2")]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491const unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
492 const fn mask(shift: i32, i: u32) -> u32 {
493 let shift = shift as u32 & 0xff;
494 if shift > 15 { i } else { 16 - shift + i }
495 }
496 transmute::<i8x16, _>(simd_shuffle!(
497 i8x16::ZERO,
498 a.as_i8x16(),
499 [
500 mask(IMM8, 0),
501 mask(IMM8, 1),
502 mask(IMM8, 2),
503 mask(IMM8, 3),
504 mask(IMM8, 4),
505 mask(IMM8, 5),
506 mask(IMM8, 6),
507 mask(IMM8, 7),
508 mask(IMM8, 8),
509 mask(IMM8, 9),
510 mask(IMM8, 10),
511 mask(IMM8, 11),
512 mask(IMM8, 12),
513 mask(IMM8, 13),
514 mask(IMM8, 14),
515 mask(IMM8, 15),
516 ],
517 ))
518}
519
520#[inline]
524#[target_feature(enable = "sse2")]
525#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
526#[rustc_legacy_const_generics(1)]
527#[stable(feature = "simd_x86", since = "1.27.0")]
528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
529pub const fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
530 unsafe {
531 static_assert_uimm_bits!(IMM8, 8);
532 _mm_slli_si128_impl::<IMM8>(a)
533 }
534}
535
536#[inline]
540#[target_feature(enable = "sse2")]
541#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
542#[rustc_legacy_const_generics(1)]
543#[stable(feature = "simd_x86", since = "1.27.0")]
544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
545pub const fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
546 unsafe {
547 static_assert_uimm_bits!(IMM8, 8);
548 _mm_srli_si128_impl::<IMM8>(a)
549 }
550}
551
552#[inline]
556#[target_feature(enable = "sse2")]
557#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
558#[rustc_legacy_const_generics(1)]
559#[stable(feature = "simd_x86", since = "1.27.0")]
560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
561pub const fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
562 static_assert_uimm_bits!(IMM8, 8);
563 unsafe {
564 if IMM8 >= 16 {
565 _mm_setzero_si128()
566 } else {
567 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
568 }
569 }
570}
571
572#[inline]
577#[target_feature(enable = "sse2")]
578#[cfg_attr(test, assert_instr(psllw))]
579#[stable(feature = "simd_x86", since = "1.27.0")]
580pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
581 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
582}
583
584#[inline]
588#[target_feature(enable = "sse2")]
589#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
590#[rustc_legacy_const_generics(1)]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
594 static_assert_uimm_bits!(IMM8, 8);
595 unsafe {
596 if IMM8 >= 32 {
597 _mm_setzero_si128()
598 } else {
599 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
600 }
601 }
602}
603
604#[inline]
609#[target_feature(enable = "sse2")]
610#[cfg_attr(test, assert_instr(pslld))]
611#[stable(feature = "simd_x86", since = "1.27.0")]
612pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
613 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
614}
615
616#[inline]
620#[target_feature(enable = "sse2")]
621#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
622#[rustc_legacy_const_generics(1)]
623#[stable(feature = "simd_x86", since = "1.27.0")]
624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
625pub const fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
626 static_assert_uimm_bits!(IMM8, 8);
627 unsafe {
628 if IMM8 >= 64 {
629 _mm_setzero_si128()
630 } else {
631 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
632 }
633 }
634}
635
636#[inline]
641#[target_feature(enable = "sse2")]
642#[cfg_attr(test, assert_instr(psllq))]
643#[stable(feature = "simd_x86", since = "1.27.0")]
644pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
645 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
646}
647
648#[inline]
653#[target_feature(enable = "sse2")]
654#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
655#[rustc_legacy_const_generics(1)]
656#[stable(feature = "simd_x86", since = "1.27.0")]
657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
658pub const fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
659 static_assert_uimm_bits!(IMM8, 8);
660 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
661}
662
663#[inline]
668#[target_feature(enable = "sse2")]
669#[cfg_attr(test, assert_instr(psraw))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
672 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
673}
674
675#[inline]
680#[target_feature(enable = "sse2")]
681#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
682#[rustc_legacy_const_generics(1)]
683#[stable(feature = "simd_x86", since = "1.27.0")]
684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
685pub const fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
686 static_assert_uimm_bits!(IMM8, 8);
687 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
688}
689
690#[inline]
695#[target_feature(enable = "sse2")]
696#[cfg_attr(test, assert_instr(psrad))]
697#[stable(feature = "simd_x86", since = "1.27.0")]
698pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
699 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
700}
701
702#[inline]
706#[target_feature(enable = "sse2")]
707#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
708#[rustc_legacy_const_generics(1)]
709#[stable(feature = "simd_x86", since = "1.27.0")]
710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
711pub const fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
712 static_assert_uimm_bits!(IMM8, 8);
713 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
714}
715
716#[inline]
719#[target_feature(enable = "sse2")]
720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
721const unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
722 const fn mask(shift: i32, i: u32) -> u32 {
723 if (shift as u32) > 15 {
724 i + 16
725 } else {
726 i + (shift as u32)
727 }
728 }
729 let x: i8x16 = simd_shuffle!(
730 a.as_i8x16(),
731 i8x16::ZERO,
732 [
733 mask(IMM8, 0),
734 mask(IMM8, 1),
735 mask(IMM8, 2),
736 mask(IMM8, 3),
737 mask(IMM8, 4),
738 mask(IMM8, 5),
739 mask(IMM8, 6),
740 mask(IMM8, 7),
741 mask(IMM8, 8),
742 mask(IMM8, 9),
743 mask(IMM8, 10),
744 mask(IMM8, 11),
745 mask(IMM8, 12),
746 mask(IMM8, 13),
747 mask(IMM8, 14),
748 mask(IMM8, 15),
749 ],
750 );
751 transmute(x)
752}
753
754#[inline]
759#[target_feature(enable = "sse2")]
760#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
761#[rustc_legacy_const_generics(1)]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764pub const fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
765 static_assert_uimm_bits!(IMM8, 8);
766 unsafe {
767 if IMM8 >= 16 {
768 _mm_setzero_si128()
769 } else {
770 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
771 }
772 }
773}
774
775#[inline]
780#[target_feature(enable = "sse2")]
781#[cfg_attr(test, assert_instr(psrlw))]
782#[stable(feature = "simd_x86", since = "1.27.0")]
783pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
784 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
785}
786
787#[inline]
792#[target_feature(enable = "sse2")]
793#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
794#[rustc_legacy_const_generics(1)]
795#[stable(feature = "simd_x86", since = "1.27.0")]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
798 static_assert_uimm_bits!(IMM8, 8);
799 unsafe {
800 if IMM8 >= 32 {
801 _mm_setzero_si128()
802 } else {
803 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
804 }
805 }
806}
807
808#[inline]
813#[target_feature(enable = "sse2")]
814#[cfg_attr(test, assert_instr(psrld))]
815#[stable(feature = "simd_x86", since = "1.27.0")]
816pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
817 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
818}
819
820#[inline]
825#[target_feature(enable = "sse2")]
826#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
827#[rustc_legacy_const_generics(1)]
828#[stable(feature = "simd_x86", since = "1.27.0")]
829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
830pub const fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
831 static_assert_uimm_bits!(IMM8, 8);
832 unsafe {
833 if IMM8 >= 64 {
834 _mm_setzero_si128()
835 } else {
836 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
837 }
838 }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(psrlq))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
850 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(andps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
862pub const fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
863 unsafe { simd_and(a, b) }
864}
865
866#[inline]
871#[target_feature(enable = "sse2")]
872#[cfg_attr(test, assert_instr(andnps))]
873#[stable(feature = "simd_x86", since = "1.27.0")]
874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
875pub const fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
876 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
877}
878
879#[inline]
884#[target_feature(enable = "sse2")]
885#[cfg_attr(test, assert_instr(orps))]
886#[stable(feature = "simd_x86", since = "1.27.0")]
887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
888pub const fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
889 unsafe { simd_or(a, b) }
890}
891
892#[inline]
897#[target_feature(enable = "sse2")]
898#[cfg_attr(test, assert_instr(xorps))]
899#[stable(feature = "simd_x86", since = "1.27.0")]
900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
901pub const fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
902 unsafe { simd_xor(a, b) }
903}
904
905#[inline]
909#[target_feature(enable = "sse2")]
910#[cfg_attr(test, assert_instr(pcmpeqb))]
911#[stable(feature = "simd_x86", since = "1.27.0")]
912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
913pub const fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
914 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
915}
916
917#[inline]
921#[target_feature(enable = "sse2")]
922#[cfg_attr(test, assert_instr(pcmpeqw))]
923#[stable(feature = "simd_x86", since = "1.27.0")]
924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
925pub const fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
926 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
927}
928
929#[inline]
933#[target_feature(enable = "sse2")]
934#[cfg_attr(test, assert_instr(pcmpeqd))]
935#[stable(feature = "simd_x86", since = "1.27.0")]
936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
937pub const fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
938 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
939}
940
941#[inline]
945#[target_feature(enable = "sse2")]
946#[cfg_attr(test, assert_instr(pcmpgtb))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtw))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
961pub const fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
962 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
963}
964
965#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(pcmpgtd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
973pub const fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
974 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
975}
976
977#[inline]
981#[target_feature(enable = "sse2")]
982#[cfg_attr(test, assert_instr(pcmpgtb))]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
985pub const fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
986 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
987}
988
989#[inline]
993#[target_feature(enable = "sse2")]
994#[cfg_attr(test, assert_instr(pcmpgtw))]
995#[stable(feature = "simd_x86", since = "1.27.0")]
996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
997pub const fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
998 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
999}
1000
1001#[inline]
1005#[target_feature(enable = "sse2")]
1006#[cfg_attr(test, assert_instr(pcmpgtd))]
1007#[stable(feature = "simd_x86", since = "1.27.0")]
1008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1009pub const fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
1010 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
1011}
1012
1013#[inline]
1018#[target_feature(enable = "sse2")]
1019#[cfg_attr(test, assert_instr(cvtdq2pd))]
1020#[stable(feature = "simd_x86", since = "1.27.0")]
1021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1022pub const fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
1023 unsafe {
1024 let a = a.as_i32x4();
1025 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
1026 }
1027}
1028
1029#[inline]
1034#[target_feature(enable = "sse2")]
1035#[cfg_attr(test, assert_instr(cvtsi2sd))]
1036#[stable(feature = "simd_x86", since = "1.27.0")]
1037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1038pub const fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
1039 unsafe { simd_insert!(a, 0, b as f64) }
1040}
1041
1042#[inline]
1047#[target_feature(enable = "sse2")]
1048#[cfg_attr(test, assert_instr(cvtdq2ps))]
1049#[stable(feature = "simd_x86", since = "1.27.0")]
1050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1051pub const fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1052 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1053}
1054
1055#[inline]
1060#[target_feature(enable = "sse2")]
1061#[cfg_attr(test, assert_instr(cvtps2dq))]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1064 unsafe { transmute(cvtps2dq(a)) }
1065}
1066
1067#[inline]
1072#[target_feature(enable = "sse2")]
1073#[stable(feature = "simd_x86", since = "1.27.0")]
1074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1075pub const fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1076 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1085#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1086pub const fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1087 unsafe { simd_extract!(a.as_i32x4(), 0) }
1088}
1089
1090#[inline]
1095#[target_feature(enable = "sse2")]
1096#[stable(feature = "simd_x86", since = "1.27.0")]
1098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1099pub const fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1100 unsafe { transmute(i64x2::new(e0, e1)) }
1101}
1102
1103#[inline]
1107#[target_feature(enable = "sse2")]
1108#[stable(feature = "simd_x86", since = "1.27.0")]
1110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1111pub const fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1112 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1113}
1114
1115#[inline]
1119#[target_feature(enable = "sse2")]
1120#[stable(feature = "simd_x86", since = "1.27.0")]
1122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1123pub const fn _mm_set_epi16(
1124 e7: i16,
1125 e6: i16,
1126 e5: i16,
1127 e4: i16,
1128 e3: i16,
1129 e2: i16,
1130 e1: i16,
1131 e0: i16,
1132) -> __m128i {
1133 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1134}
1135
1136#[inline]
1140#[target_feature(enable = "sse2")]
1141#[stable(feature = "simd_x86", since = "1.27.0")]
1143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1144pub const fn _mm_set_epi8(
1145 e15: i8,
1146 e14: i8,
1147 e13: i8,
1148 e12: i8,
1149 e11: i8,
1150 e10: i8,
1151 e9: i8,
1152 e8: i8,
1153 e7: i8,
1154 e6: i8,
1155 e5: i8,
1156 e4: i8,
1157 e3: i8,
1158 e2: i8,
1159 e1: i8,
1160 e0: i8,
1161) -> __m128i {
1162 unsafe {
1163 #[rustfmt::skip]
1164 transmute(i8x16::new(
1165 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1166 ))
1167 }
1168}
1169
1170#[inline]
1174#[target_feature(enable = "sse2")]
1175#[stable(feature = "simd_x86", since = "1.27.0")]
1177#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1178pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
1179 i64x2::splat(a).as_m128i()
1180}
1181
1182#[inline]
1186#[target_feature(enable = "sse2")]
1187#[stable(feature = "simd_x86", since = "1.27.0")]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm_set1_epi32(a: i32) -> __m128i {
1191 i32x4::splat(a).as_m128i()
1192}
1193
1194#[inline]
1198#[target_feature(enable = "sse2")]
1199#[stable(feature = "simd_x86", since = "1.27.0")]
1201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1202pub const fn _mm_set1_epi16(a: i16) -> __m128i {
1203 i16x8::splat(a).as_m128i()
1204}
1205
1206#[inline]
1210#[target_feature(enable = "sse2")]
1211#[stable(feature = "simd_x86", since = "1.27.0")]
1213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214pub const fn _mm_set1_epi8(a: i8) -> __m128i {
1215 i8x16::splat(a).as_m128i()
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[stable(feature = "simd_x86", since = "1.27.0")]
1225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1226pub const fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1227 _mm_set_epi32(e0, e1, e2, e3)
1228}
1229
1230#[inline]
1234#[target_feature(enable = "sse2")]
1235#[stable(feature = "simd_x86", since = "1.27.0")]
1237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1238pub const fn _mm_setr_epi16(
1239 e7: i16,
1240 e6: i16,
1241 e5: i16,
1242 e4: i16,
1243 e3: i16,
1244 e2: i16,
1245 e1: i16,
1246 e0: i16,
1247) -> __m128i {
1248 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1249}
1250
1251#[inline]
1255#[target_feature(enable = "sse2")]
1256#[stable(feature = "simd_x86", since = "1.27.0")]
1258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259pub const fn _mm_setr_epi8(
1260 e15: i8,
1261 e14: i8,
1262 e13: i8,
1263 e12: i8,
1264 e11: i8,
1265 e10: i8,
1266 e9: i8,
1267 e8: i8,
1268 e7: i8,
1269 e6: i8,
1270 e5: i8,
1271 e4: i8,
1272 e3: i8,
1273 e2: i8,
1274 e1: i8,
1275 e0: i8,
1276) -> __m128i {
1277 #[rustfmt::skip]
1278 _mm_set_epi8(
1279 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1280 )
1281}
1282
1283#[inline]
1287#[target_feature(enable = "sse2")]
1288#[cfg_attr(test, assert_instr(xorps))]
1289#[stable(feature = "simd_x86", since = "1.27.0")]
1290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1291pub const fn _mm_setzero_si128() -> __m128i {
1292 const { unsafe { mem::zeroed() } }
1293}
1294
1295#[inline]
1299#[target_feature(enable = "sse2")]
1300#[stable(feature = "simd_x86", since = "1.27.0")]
1301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1302pub const unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1303 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1304}
1305
1306#[inline]
1312#[target_feature(enable = "sse2")]
1313#[cfg_attr(
1314 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1315 assert_instr(movaps)
1316)]
1317#[stable(feature = "simd_x86", since = "1.27.0")]
1318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1319pub const unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1320 *mem_addr
1321}
1322
1323#[inline]
1329#[target_feature(enable = "sse2")]
1330#[cfg_attr(test, assert_instr(movups))]
1331#[stable(feature = "simd_x86", since = "1.27.0")]
1332#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1333pub const unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1334 let mut dst: __m128i = _mm_undefined_si128();
1335 ptr::copy_nonoverlapping(
1336 mem_addr as *const u8,
1337 ptr::addr_of_mut!(dst) as *mut u8,
1338 mem::size_of::<__m128i>(),
1339 );
1340 dst
1341}
1342
1343#[inline]
1363#[target_feature(enable = "sse2")]
1364#[cfg_attr(test, assert_instr(maskmovdqu))]
1365#[stable(feature = "simd_x86", since = "1.27.0")]
1366pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1367 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1368}
1369
1370#[inline]
1376#[target_feature(enable = "sse2")]
1377#[cfg_attr(
1378 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1379 assert_instr(movaps)
1380)]
1381#[stable(feature = "simd_x86", since = "1.27.0")]
1382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1383pub const unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1384 *mem_addr = a;
1385}
1386
1387#[inline]
1393#[target_feature(enable = "sse2")]
1394#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1397pub const unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1398 mem_addr.write_unaligned(a);
1399}
1400
1401#[inline]
1407#[target_feature(enable = "sse2")]
1408#[stable(feature = "simd_x86", since = "1.27.0")]
1409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1410pub const unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1411 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1412}
1413
1414#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(movntdq))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1433 crate::arch::asm!(
1435 vps!("movntdq", ",{a}"),
1436 p = in(reg) mem_addr,
1437 a = in(xmm_reg) a,
1438 options(nostack, preserves_flags),
1439 );
1440}
1441
1442#[inline]
1457#[target_feature(enable = "sse2")]
1458#[cfg_attr(test, assert_instr(movnti))]
1459#[stable(feature = "simd_x86", since = "1.27.0")]
1460pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1461 crate::arch::asm!(
1463 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1465 a = in(reg) a,
1466 options(nostack, preserves_flags),
1467 );
1468}
1469
1470#[inline]
1475#[target_feature(enable = "sse2")]
1476#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1478#[stable(feature = "simd_x86", since = "1.27.0")]
1479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1480pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
1481 unsafe {
1482 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1483 transmute(r)
1484 }
1485}
1486
1487#[inline]
1492#[target_feature(enable = "sse2")]
1493#[cfg_attr(test, assert_instr(packsswb))]
1494#[stable(feature = "simd_x86", since = "1.27.0")]
1495pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1496 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1497}
1498
1499#[inline]
1504#[target_feature(enable = "sse2")]
1505#[cfg_attr(test, assert_instr(packssdw))]
1506#[stable(feature = "simd_x86", since = "1.27.0")]
1507pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1508 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1509}
1510
1511#[inline]
1516#[target_feature(enable = "sse2")]
1517#[cfg_attr(test, assert_instr(packuswb))]
1518#[stable(feature = "simd_x86", since = "1.27.0")]
1519pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1520 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1521}
1522
1523#[inline]
1527#[target_feature(enable = "sse2")]
1528#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1529#[rustc_legacy_const_generics(1)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1532pub const fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1533 static_assert_uimm_bits!(IMM8, 3);
1534 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1535}
1536
1537#[inline]
1541#[target_feature(enable = "sse2")]
1542#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1543#[rustc_legacy_const_generics(2)]
1544#[stable(feature = "simd_x86", since = "1.27.0")]
1545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1546pub const fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1547 static_assert_uimm_bits!(IMM8, 3);
1548 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1549}
1550
1551#[inline]
1555#[target_feature(enable = "sse2")]
1556#[cfg_attr(test, assert_instr(pmovmskb))]
1557#[stable(feature = "simd_x86", since = "1.27.0")]
1558#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1559pub const fn _mm_movemask_epi8(a: __m128i) -> i32 {
1560 unsafe {
1561 let z = i8x16::ZERO;
1562 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1563 simd_bitmask::<_, u16>(m) as u32 as i32
1564 }
1565}
1566
1567#[inline]
1571#[target_feature(enable = "sse2")]
1572#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1573#[rustc_legacy_const_generics(1)]
1574#[stable(feature = "simd_x86", since = "1.27.0")]
1575#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1576pub const fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1577 static_assert_uimm_bits!(IMM8, 8);
1578 unsafe {
1579 let a = a.as_i32x4();
1580 let x: i32x4 = simd_shuffle!(
1581 a,
1582 a,
1583 [
1584 IMM8 as u32 & 0b11,
1585 (IMM8 as u32 >> 2) & 0b11,
1586 (IMM8 as u32 >> 4) & 0b11,
1587 (IMM8 as u32 >> 6) & 0b11,
1588 ],
1589 );
1590 transmute(x)
1591 }
1592}
1593
1594#[inline]
1602#[target_feature(enable = "sse2")]
1603#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1604#[rustc_legacy_const_generics(1)]
1605#[stable(feature = "simd_x86", since = "1.27.0")]
1606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1607pub const fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1608 static_assert_uimm_bits!(IMM8, 8);
1609 unsafe {
1610 let a = a.as_i16x8();
1611 let x: i16x8 = simd_shuffle!(
1612 a,
1613 a,
1614 [
1615 0,
1616 1,
1617 2,
1618 3,
1619 (IMM8 as u32 & 0b11) + 4,
1620 ((IMM8 as u32 >> 2) & 0b11) + 4,
1621 ((IMM8 as u32 >> 4) & 0b11) + 4,
1622 ((IMM8 as u32 >> 6) & 0b11) + 4,
1623 ],
1624 );
1625 transmute(x)
1626 }
1627}
1628
1629#[inline]
1637#[target_feature(enable = "sse2")]
1638#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1639#[rustc_legacy_const_generics(1)]
1640#[stable(feature = "simd_x86", since = "1.27.0")]
1641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1642pub const fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1643 static_assert_uimm_bits!(IMM8, 8);
1644 unsafe {
1645 let a = a.as_i16x8();
1646 let x: i16x8 = simd_shuffle!(
1647 a,
1648 a,
1649 [
1650 IMM8 as u32 & 0b11,
1651 (IMM8 as u32 >> 2) & 0b11,
1652 (IMM8 as u32 >> 4) & 0b11,
1653 (IMM8 as u32 >> 6) & 0b11,
1654 4,
1655 5,
1656 6,
1657 7,
1658 ],
1659 );
1660 transmute(x)
1661 }
1662}
1663
1664#[inline]
1668#[target_feature(enable = "sse2")]
1669#[cfg_attr(test, assert_instr(punpckhbw))]
1670#[stable(feature = "simd_x86", since = "1.27.0")]
1671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1672pub const fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1673 unsafe {
1674 transmute::<i8x16, _>(simd_shuffle!(
1675 a.as_i8x16(),
1676 b.as_i8x16(),
1677 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1678 ))
1679 }
1680}
1681
1682#[inline]
1686#[target_feature(enable = "sse2")]
1687#[cfg_attr(test, assert_instr(punpckhwd))]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1690pub const fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1691 unsafe {
1692 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1693 transmute::<i16x8, _>(x)
1694 }
1695}
1696
1697#[inline]
1701#[target_feature(enable = "sse2")]
1702#[cfg_attr(test, assert_instr(unpckhps))]
1703#[stable(feature = "simd_x86", since = "1.27.0")]
1704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1705pub const fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1706 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1707}
1708
1709#[inline]
1713#[target_feature(enable = "sse2")]
1714#[cfg_attr(test, assert_instr(unpckhpd))]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1717pub const fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1718 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1719}
1720
1721#[inline]
1725#[target_feature(enable = "sse2")]
1726#[cfg_attr(test, assert_instr(punpcklbw))]
1727#[stable(feature = "simd_x86", since = "1.27.0")]
1728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1729pub const fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1730 unsafe {
1731 transmute::<i8x16, _>(simd_shuffle!(
1732 a.as_i8x16(),
1733 b.as_i8x16(),
1734 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1735 ))
1736 }
1737}
1738
1739#[inline]
1743#[target_feature(enable = "sse2")]
1744#[cfg_attr(test, assert_instr(punpcklwd))]
1745#[stable(feature = "simd_x86", since = "1.27.0")]
1746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1747pub const fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1748 unsafe {
1749 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1750 transmute::<i16x8, _>(x)
1751 }
1752}
1753
1754#[inline]
1758#[target_feature(enable = "sse2")]
1759#[cfg_attr(test, assert_instr(unpcklps))]
1760#[stable(feature = "simd_x86", since = "1.27.0")]
1761#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1762pub const fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1763 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1764}
1765
1766#[inline]
1770#[target_feature(enable = "sse2")]
1771#[cfg_attr(test, assert_instr(movlhps))]
1772#[stable(feature = "simd_x86", since = "1.27.0")]
1773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1774pub const fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1775 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1776}
1777
1778#[inline]
1783#[target_feature(enable = "sse2")]
1784#[cfg_attr(test, assert_instr(addsd))]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1788 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1789}
1790
1791#[inline]
1796#[target_feature(enable = "sse2")]
1797#[cfg_attr(test, assert_instr(addpd))]
1798#[stable(feature = "simd_x86", since = "1.27.0")]
1799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1800pub const fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1801 unsafe { simd_add(a, b) }
1802}
1803
1804#[inline]
1809#[target_feature(enable = "sse2")]
1810#[cfg_attr(test, assert_instr(divsd))]
1811#[stable(feature = "simd_x86", since = "1.27.0")]
1812#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1813pub const fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1814 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1815}
1816
1817#[inline]
1822#[target_feature(enable = "sse2")]
1823#[cfg_attr(test, assert_instr(divpd))]
1824#[stable(feature = "simd_x86", since = "1.27.0")]
1825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1826pub const fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1827 unsafe { simd_div(a, b) }
1828}
1829
1830#[inline]
1835#[target_feature(enable = "sse2")]
1836#[cfg_attr(test, assert_instr(maxsd))]
1837#[stable(feature = "simd_x86", since = "1.27.0")]
1838pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1839 unsafe { maxsd(a, b) }
1840}
1841
1842#[inline]
1847#[target_feature(enable = "sse2")]
1848#[cfg_attr(test, assert_instr(maxpd))]
1849#[stable(feature = "simd_x86", since = "1.27.0")]
1850pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1851 unsafe { maxpd(a, b) }
1852}
1853
1854#[inline]
1859#[target_feature(enable = "sse2")]
1860#[cfg_attr(test, assert_instr(minsd))]
1861#[stable(feature = "simd_x86", since = "1.27.0")]
1862pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1863 unsafe { minsd(a, b) }
1864}
1865
1866#[inline]
1871#[target_feature(enable = "sse2")]
1872#[cfg_attr(test, assert_instr(minpd))]
1873#[stable(feature = "simd_x86", since = "1.27.0")]
1874pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1875 unsafe { minpd(a, b) }
1876}
1877
1878#[inline]
1883#[target_feature(enable = "sse2")]
1884#[cfg_attr(test, assert_instr(mulsd))]
1885#[stable(feature = "simd_x86", since = "1.27.0")]
1886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1887pub const fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1888 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1889}
1890
1891#[inline]
1896#[target_feature(enable = "sse2")]
1897#[cfg_attr(test, assert_instr(mulpd))]
1898#[stable(feature = "simd_x86", since = "1.27.0")]
1899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1900pub const fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1901 unsafe { simd_mul(a, b) }
1902}
1903
1904#[inline]
1909#[target_feature(enable = "sse2")]
1910#[cfg_attr(test, assert_instr(sqrtsd))]
1911#[stable(feature = "simd_x86", since = "1.27.0")]
1912pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1913 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1914}
1915
1916#[inline]
1920#[target_feature(enable = "sse2")]
1921#[cfg_attr(test, assert_instr(sqrtpd))]
1922#[stable(feature = "simd_x86", since = "1.27.0")]
1923pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1924 unsafe { simd_fsqrt(a) }
1925}
1926
1927#[inline]
1932#[target_feature(enable = "sse2")]
1933#[cfg_attr(test, assert_instr(subsd))]
1934#[stable(feature = "simd_x86", since = "1.27.0")]
1935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1936pub const fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1937 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1938}
1939
1940#[inline]
1945#[target_feature(enable = "sse2")]
1946#[cfg_attr(test, assert_instr(subpd))]
1947#[stable(feature = "simd_x86", since = "1.27.0")]
1948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949pub const fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1950 unsafe { simd_sub(a, b) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(andps))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1962pub const fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1963 unsafe {
1964 let a: __m128i = transmute(a);
1965 let b: __m128i = transmute(b);
1966 transmute(_mm_and_si128(a, b))
1967 }
1968}
1969
1970#[inline]
1974#[target_feature(enable = "sse2")]
1975#[cfg_attr(test, assert_instr(andnps))]
1976#[stable(feature = "simd_x86", since = "1.27.0")]
1977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1978pub const fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1979 unsafe {
1980 let a: __m128i = transmute(a);
1981 let b: __m128i = transmute(b);
1982 transmute(_mm_andnot_si128(a, b))
1983 }
1984}
1985
1986#[inline]
1990#[target_feature(enable = "sse2")]
1991#[cfg_attr(test, assert_instr(orps))]
1992#[stable(feature = "simd_x86", since = "1.27.0")]
1993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1994pub const fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1995 unsafe {
1996 let a: __m128i = transmute(a);
1997 let b: __m128i = transmute(b);
1998 transmute(_mm_or_si128(a, b))
1999 }
2000}
2001
2002#[inline]
2006#[target_feature(enable = "sse2")]
2007#[cfg_attr(test, assert_instr(xorps))]
2008#[stable(feature = "simd_x86", since = "1.27.0")]
2009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2010pub const fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
2011 unsafe {
2012 let a: __m128i = transmute(a);
2013 let b: __m128i = transmute(b);
2014 transmute(_mm_xor_si128(a, b))
2015 }
2016}
2017
2018#[inline]
2023#[target_feature(enable = "sse2")]
2024#[cfg_attr(test, assert_instr(cmpeqsd))]
2025#[stable(feature = "simd_x86", since = "1.27.0")]
2026pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
2027 unsafe { cmpsd(a, b, 0) }
2028}
2029
2030#[inline]
2035#[target_feature(enable = "sse2")]
2036#[cfg_attr(test, assert_instr(cmpltsd))]
2037#[stable(feature = "simd_x86", since = "1.27.0")]
2038pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
2039 unsafe { cmpsd(a, b, 1) }
2040}
2041
2042#[inline]
2047#[target_feature(enable = "sse2")]
2048#[cfg_attr(test, assert_instr(cmplesd))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
2051 unsafe { cmpsd(a, b, 2) }
2052}
2053
2054#[inline]
2059#[target_feature(enable = "sse2")]
2060#[cfg_attr(test, assert_instr(cmpltsd))]
2061#[stable(feature = "simd_x86", since = "1.27.0")]
2062pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
2063 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2064}
2065
2066#[inline]
2071#[target_feature(enable = "sse2")]
2072#[cfg_attr(test, assert_instr(cmplesd))]
2073#[stable(feature = "simd_x86", since = "1.27.0")]
2074pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
2075 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2076}
2077
2078#[inline]
2085#[target_feature(enable = "sse2")]
2086#[cfg_attr(test, assert_instr(cmpordsd))]
2087#[stable(feature = "simd_x86", since = "1.27.0")]
2088pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2089 unsafe { cmpsd(a, b, 7) }
2090}
2091
2092#[inline]
2098#[target_feature(enable = "sse2")]
2099#[cfg_attr(test, assert_instr(cmpunordsd))]
2100#[stable(feature = "simd_x86", since = "1.27.0")]
2101pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2102 unsafe { cmpsd(a, b, 3) }
2103}
2104
2105#[inline]
2110#[target_feature(enable = "sse2")]
2111#[cfg_attr(test, assert_instr(cmpneqsd))]
2112#[stable(feature = "simd_x86", since = "1.27.0")]
2113pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2114 unsafe { cmpsd(a, b, 4) }
2115}
2116
2117#[inline]
2122#[target_feature(enable = "sse2")]
2123#[cfg_attr(test, assert_instr(cmpnltsd))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2126 unsafe { cmpsd(a, b, 5) }
2127}
2128
2129#[inline]
2134#[target_feature(enable = "sse2")]
2135#[cfg_attr(test, assert_instr(cmpnlesd))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2138 unsafe { cmpsd(a, b, 6) }
2139}
2140
2141#[inline]
2146#[target_feature(enable = "sse2")]
2147#[cfg_attr(test, assert_instr(cmpnltsd))]
2148#[stable(feature = "simd_x86", since = "1.27.0")]
2149pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2150 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2151}
2152
2153#[inline]
2158#[target_feature(enable = "sse2")]
2159#[cfg_attr(test, assert_instr(cmpnlesd))]
2160#[stable(feature = "simd_x86", since = "1.27.0")]
2161pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2162 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2163}
2164
2165#[inline]
2169#[target_feature(enable = "sse2")]
2170#[cfg_attr(test, assert_instr(cmpeqpd))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2173 unsafe { cmppd(a, b, 0) }
2174}
2175
2176#[inline]
2180#[target_feature(enable = "sse2")]
2181#[cfg_attr(test, assert_instr(cmpltpd))]
2182#[stable(feature = "simd_x86", since = "1.27.0")]
2183pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2184 unsafe { cmppd(a, b, 1) }
2185}
2186
2187#[inline]
2191#[target_feature(enable = "sse2")]
2192#[cfg_attr(test, assert_instr(cmplepd))]
2193#[stable(feature = "simd_x86", since = "1.27.0")]
2194pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2195 unsafe { cmppd(a, b, 2) }
2196}
2197
2198#[inline]
2202#[target_feature(enable = "sse2")]
2203#[cfg_attr(test, assert_instr(cmpltpd))]
2204#[stable(feature = "simd_x86", since = "1.27.0")]
2205pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2206 _mm_cmplt_pd(b, a)
2207}
2208
2209#[inline]
2213#[target_feature(enable = "sse2")]
2214#[cfg_attr(test, assert_instr(cmplepd))]
2215#[stable(feature = "simd_x86", since = "1.27.0")]
2216pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2217 _mm_cmple_pd(b, a)
2218}
2219
2220#[inline]
2224#[target_feature(enable = "sse2")]
2225#[cfg_attr(test, assert_instr(cmpordpd))]
2226#[stable(feature = "simd_x86", since = "1.27.0")]
2227pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2228 unsafe { cmppd(a, b, 7) }
2229}
2230
2231#[inline]
2235#[target_feature(enable = "sse2")]
2236#[cfg_attr(test, assert_instr(cmpunordpd))]
2237#[stable(feature = "simd_x86", since = "1.27.0")]
2238pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2239 unsafe { cmppd(a, b, 3) }
2240}
2241
2242#[inline]
2246#[target_feature(enable = "sse2")]
2247#[cfg_attr(test, assert_instr(cmpneqpd))]
2248#[stable(feature = "simd_x86", since = "1.27.0")]
2249pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2250 unsafe { cmppd(a, b, 4) }
2251}
2252
2253#[inline]
2257#[target_feature(enable = "sse2")]
2258#[cfg_attr(test, assert_instr(cmpnltpd))]
2259#[stable(feature = "simd_x86", since = "1.27.0")]
2260pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2261 unsafe { cmppd(a, b, 5) }
2262}
2263
2264#[inline]
2268#[target_feature(enable = "sse2")]
2269#[cfg_attr(test, assert_instr(cmpnlepd))]
2270#[stable(feature = "simd_x86", since = "1.27.0")]
2271pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2272 unsafe { cmppd(a, b, 6) }
2273}
2274
2275#[inline]
2279#[target_feature(enable = "sse2")]
2280#[cfg_attr(test, assert_instr(cmpnltpd))]
2281#[stable(feature = "simd_x86", since = "1.27.0")]
2282pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2283 _mm_cmpnlt_pd(b, a)
2284}
2285
2286#[inline]
2291#[target_feature(enable = "sse2")]
2292#[cfg_attr(test, assert_instr(cmpnlepd))]
2293#[stable(feature = "simd_x86", since = "1.27.0")]
2294pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2295 _mm_cmpnle_pd(b, a)
2296}
2297
2298#[inline]
2302#[target_feature(enable = "sse2")]
2303#[cfg_attr(test, assert_instr(comisd))]
2304#[stable(feature = "simd_x86", since = "1.27.0")]
2305pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2306 unsafe { comieqsd(a, b) }
2307}
2308
2309#[inline]
2313#[target_feature(enable = "sse2")]
2314#[cfg_attr(test, assert_instr(comisd))]
2315#[stable(feature = "simd_x86", since = "1.27.0")]
2316pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2317 unsafe { comiltsd(a, b) }
2318}
2319
2320#[inline]
2324#[target_feature(enable = "sse2")]
2325#[cfg_attr(test, assert_instr(comisd))]
2326#[stable(feature = "simd_x86", since = "1.27.0")]
2327pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2328 unsafe { comilesd(a, b) }
2329}
2330
2331#[inline]
2335#[target_feature(enable = "sse2")]
2336#[cfg_attr(test, assert_instr(comisd))]
2337#[stable(feature = "simd_x86", since = "1.27.0")]
2338pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2339 unsafe { comigtsd(a, b) }
2340}
2341
2342#[inline]
2346#[target_feature(enable = "sse2")]
2347#[cfg_attr(test, assert_instr(comisd))]
2348#[stable(feature = "simd_x86", since = "1.27.0")]
2349pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2350 unsafe { comigesd(a, b) }
2351}
2352
2353#[inline]
2357#[target_feature(enable = "sse2")]
2358#[cfg_attr(test, assert_instr(comisd))]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2361 unsafe { comineqsd(a, b) }
2362}
2363
2364#[inline]
2368#[target_feature(enable = "sse2")]
2369#[cfg_attr(test, assert_instr(ucomisd))]
2370#[stable(feature = "simd_x86", since = "1.27.0")]
2371pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2372 unsafe { ucomieqsd(a, b) }
2373}
2374
2375#[inline]
2379#[target_feature(enable = "sse2")]
2380#[cfg_attr(test, assert_instr(ucomisd))]
2381#[stable(feature = "simd_x86", since = "1.27.0")]
2382pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2383 unsafe { ucomiltsd(a, b) }
2384}
2385
2386#[inline]
2390#[target_feature(enable = "sse2")]
2391#[cfg_attr(test, assert_instr(ucomisd))]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2394 unsafe { ucomilesd(a, b) }
2395}
2396
2397#[inline]
2401#[target_feature(enable = "sse2")]
2402#[cfg_attr(test, assert_instr(ucomisd))]
2403#[stable(feature = "simd_x86", since = "1.27.0")]
2404pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2405 unsafe { ucomigtsd(a, b) }
2406}
2407
2408#[inline]
2412#[target_feature(enable = "sse2")]
2413#[cfg_attr(test, assert_instr(ucomisd))]
2414#[stable(feature = "simd_x86", since = "1.27.0")]
2415pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2416 unsafe { ucomigesd(a, b) }
2417}
2418
2419#[inline]
2423#[target_feature(enable = "sse2")]
2424#[cfg_attr(test, assert_instr(ucomisd))]
2425#[stable(feature = "simd_x86", since = "1.27.0")]
2426pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2427 unsafe { ucomineqsd(a, b) }
2428}
2429
2430#[inline]
2435#[target_feature(enable = "sse2")]
2436#[cfg_attr(test, assert_instr(cvtpd2ps))]
2437#[stable(feature = "simd_x86", since = "1.27.0")]
2438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2439pub const fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2440 unsafe {
2441 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2442 let zero = f32x2::ZERO;
2443 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2444 }
2445}
2446
2447#[inline]
2453#[target_feature(enable = "sse2")]
2454#[cfg_attr(test, assert_instr(cvtps2pd))]
2455#[stable(feature = "simd_x86", since = "1.27.0")]
2456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2457pub const fn _mm_cvtps_pd(a: __m128) -> __m128d {
2458 unsafe {
2459 let a = a.as_f32x4();
2460 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2461 }
2462}
2463
2464#[inline]
2469#[target_feature(enable = "sse2")]
2470#[cfg_attr(test, assert_instr(cvtpd2dq))]
2471#[stable(feature = "simd_x86", since = "1.27.0")]
2472pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2473 unsafe { transmute(cvtpd2dq(a)) }
2474}
2475
2476#[inline]
2481#[target_feature(enable = "sse2")]
2482#[cfg_attr(test, assert_instr(cvtsd2si))]
2483#[stable(feature = "simd_x86", since = "1.27.0")]
2484pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2485 unsafe { cvtsd2si(a) }
2486}
2487
2488#[inline]
2495#[target_feature(enable = "sse2")]
2496#[cfg_attr(test, assert_instr(cvtsd2ss))]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2499 unsafe { cvtsd2ss(a, b) }
2500}
2501
2502#[inline]
2506#[target_feature(enable = "sse2")]
2507#[stable(feature = "simd_x86", since = "1.27.0")]
2508#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2509pub const fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2510 unsafe { simd_extract!(a, 0) }
2511}
2512
2513#[inline]
2520#[target_feature(enable = "sse2")]
2521#[cfg_attr(test, assert_instr(cvtss2sd))]
2522#[stable(feature = "simd_x86", since = "1.27.0")]
2523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2524pub const fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2525 unsafe {
2526 let elt: f32 = simd_extract!(b, 0);
2527 simd_insert!(a, 0, elt as f64)
2528 }
2529}
2530
2531#[inline]
2536#[target_feature(enable = "sse2")]
2537#[cfg_attr(test, assert_instr(cvttpd2dq))]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2540 unsafe { transmute(cvttpd2dq(a)) }
2541}
2542
2543#[inline]
2548#[target_feature(enable = "sse2")]
2549#[cfg_attr(test, assert_instr(cvttsd2si))]
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2551pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2552 unsafe { cvttsd2si(a) }
2553}
2554
2555#[inline]
2560#[target_feature(enable = "sse2")]
2561#[cfg_attr(test, assert_instr(cvttps2dq))]
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2563pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2564 unsafe { transmute(cvttps2dq(a)) }
2565}
2566
2567#[inline]
2572#[target_feature(enable = "sse2")]
2573#[stable(feature = "simd_x86", since = "1.27.0")]
2574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2575pub const fn _mm_set_sd(a: f64) -> __m128d {
2576 _mm_set_pd(0.0, a)
2577}
2578
2579#[inline]
2584#[target_feature(enable = "sse2")]
2585#[stable(feature = "simd_x86", since = "1.27.0")]
2586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2587pub const fn _mm_set1_pd(a: f64) -> __m128d {
2588 _mm_set_pd(a, a)
2589}
2590
2591#[inline]
2596#[target_feature(enable = "sse2")]
2597#[stable(feature = "simd_x86", since = "1.27.0")]
2598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2599pub const fn _mm_set_pd1(a: f64) -> __m128d {
2600 _mm_set_pd(a, a)
2601}
2602
2603#[inline]
2608#[target_feature(enable = "sse2")]
2609#[stable(feature = "simd_x86", since = "1.27.0")]
2610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2611pub const fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2612 __m128d([b, a])
2613}
2614
2615#[inline]
2620#[target_feature(enable = "sse2")]
2621#[stable(feature = "simd_x86", since = "1.27.0")]
2622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2623pub const fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2624 _mm_set_pd(b, a)
2625}
2626
2627#[inline]
2632#[target_feature(enable = "sse2")]
2633#[cfg_attr(test, assert_instr(xorp))]
2634#[stable(feature = "simd_x86", since = "1.27.0")]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm_setzero_pd() -> __m128d {
2637 const { unsafe { mem::zeroed() } }
2638}
2639
2640#[inline]
2647#[target_feature(enable = "sse2")]
2648#[cfg_attr(test, assert_instr(movmskpd))]
2649#[stable(feature = "simd_x86", since = "1.27.0")]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm_movemask_pd(a: __m128d) -> i32 {
2652 unsafe {
2655 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2656 simd_bitmask::<i64x2, u8>(mask) as i32
2657 }
2658}
2659
2660#[inline]
2667#[target_feature(enable = "sse2")]
2668#[cfg_attr(
2669 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2670 assert_instr(movaps)
2671)]
2672#[stable(feature = "simd_x86", since = "1.27.0")]
2673#[allow(clippy::cast_ptr_alignment)]
2674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2675pub const unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2676 *(mem_addr as *const __m128d)
2677}
2678
2679#[inline]
2684#[target_feature(enable = "sse2")]
2685#[cfg_attr(test, assert_instr(movsd))]
2686#[stable(feature = "simd_x86", since = "1.27.0")]
2687#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2688pub const unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2689 _mm_setr_pd(*mem_addr, 0.)
2690}
2691
2692#[inline]
2698#[target_feature(enable = "sse2")]
2699#[cfg_attr(test, assert_instr(movhps))]
2700#[stable(feature = "simd_x86", since = "1.27.0")]
2701#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2702pub const unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2703 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2704}
2705
2706#[inline]
2712#[target_feature(enable = "sse2")]
2713#[cfg_attr(test, assert_instr(movlps))]
2714#[stable(feature = "simd_x86", since = "1.27.0")]
2715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2716pub const unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2717 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2718}
2719
2720#[inline]
2736#[target_feature(enable = "sse2")]
2737#[cfg_attr(test, assert_instr(movntpd))]
2738#[stable(feature = "simd_x86", since = "1.27.0")]
2739#[allow(clippy::cast_ptr_alignment)]
2740pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2741 crate::arch::asm!(
2743 vps!("movntpd", ",{a}"),
2744 p = in(reg) mem_addr,
2745 a = in(xmm_reg) a,
2746 options(nostack, preserves_flags),
2747 );
2748}
2749
2750#[inline]
2755#[target_feature(enable = "sse2")]
2756#[cfg_attr(test, assert_instr(movlps))]
2757#[stable(feature = "simd_x86", since = "1.27.0")]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2760 *mem_addr = simd_extract!(a, 0)
2761}
2762
2763#[inline]
2769#[target_feature(enable = "sse2")]
2770#[cfg_attr(
2771 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2772 assert_instr(movaps)
2773)]
2774#[stable(feature = "simd_x86", since = "1.27.0")]
2775#[allow(clippy::cast_ptr_alignment)]
2776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2777pub const unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2778 *(mem_addr as *mut __m128d) = a;
2779}
2780
2781#[inline]
2787#[target_feature(enable = "sse2")]
2788#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2791pub const unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2792 mem_addr.cast::<__m128d>().write_unaligned(a);
2793}
2794
2795#[inline]
2801#[target_feature(enable = "sse2")]
2802#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2803#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2804pub const unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2805 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2806}
2807
2808#[inline]
2814#[target_feature(enable = "sse2")]
2815#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2817pub const unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2818 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2819}
2820
2821#[inline]
2827#[target_feature(enable = "sse2")]
2828#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2830pub const unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2831 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2832}
2833
2834#[inline]
2840#[target_feature(enable = "sse2")]
2841#[stable(feature = "simd_x86", since = "1.27.0")]
2842#[allow(clippy::cast_ptr_alignment)]
2843#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2844pub const unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2845 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2846 *(mem_addr as *mut __m128d) = b;
2847}
2848
2849#[inline]
2855#[target_feature(enable = "sse2")]
2856#[stable(feature = "simd_x86", since = "1.27.0")]
2857#[allow(clippy::cast_ptr_alignment)]
2858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2859pub const unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2860 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2861 *(mem_addr as *mut __m128d) = b;
2862}
2863
2864#[inline]
2871#[target_feature(enable = "sse2")]
2872#[stable(feature = "simd_x86", since = "1.27.0")]
2873#[allow(clippy::cast_ptr_alignment)]
2874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2875pub const unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2876 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2877 *(mem_addr as *mut __m128d) = b;
2878}
2879
2880#[inline]
2885#[target_feature(enable = "sse2")]
2886#[cfg_attr(test, assert_instr(movhps))]
2887#[stable(feature = "simd_x86", since = "1.27.0")]
2888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2889pub const unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2890 *mem_addr = simd_extract!(a, 1);
2891}
2892
2893#[inline]
2898#[target_feature(enable = "sse2")]
2899#[cfg_attr(test, assert_instr(movlps))]
2900#[stable(feature = "simd_x86", since = "1.27.0")]
2901#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2902pub const unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2903 *mem_addr = simd_extract!(a, 0);
2904}
2905
2906#[inline]
2911#[target_feature(enable = "sse2")]
2912#[stable(feature = "simd_x86", since = "1.27.0")]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2916 let d = *mem_addr;
2917 _mm_setr_pd(d, d)
2918}
2919
2920#[inline]
2925#[target_feature(enable = "sse2")]
2926#[stable(feature = "simd_x86", since = "1.27.0")]
2928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2929pub const unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2930 _mm_load1_pd(mem_addr)
2931}
2932
2933#[inline]
2939#[target_feature(enable = "sse2")]
2940#[cfg_attr(
2941 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2942 assert_instr(movaps)
2943)]
2944#[stable(feature = "simd_x86", since = "1.27.0")]
2945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2946pub const unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2947 let a = _mm_load_pd(mem_addr);
2948 simd_shuffle!(a, a, [1, 0])
2949}
2950
2951#[inline]
2957#[target_feature(enable = "sse2")]
2958#[cfg_attr(test, assert_instr(movups))]
2959#[stable(feature = "simd_x86", since = "1.27.0")]
2960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2961pub const unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2962 let mut dst = _mm_undefined_pd();
2963 ptr::copy_nonoverlapping(
2964 mem_addr as *const u8,
2965 ptr::addr_of_mut!(dst) as *mut u8,
2966 mem::size_of::<__m128d>(),
2967 );
2968 dst
2969}
2970
2971#[inline]
2977#[target_feature(enable = "sse2")]
2978#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2980pub const unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2981 transmute(i16x8::new(
2982 ptr::read_unaligned(mem_addr as *const i16),
2983 0,
2984 0,
2985 0,
2986 0,
2987 0,
2988 0,
2989 0,
2990 ))
2991}
2992
2993#[inline]
2999#[target_feature(enable = "sse2")]
3000#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3001#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3002pub const unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
3003 transmute(i32x4::new(
3004 ptr::read_unaligned(mem_addr as *const i32),
3005 0,
3006 0,
3007 0,
3008 ))
3009}
3010
3011#[inline]
3017#[target_feature(enable = "sse2")]
3018#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
3019#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3020pub const unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
3021 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
3022}
3023
3024#[inline]
3030#[target_feature(enable = "sse2")]
3031#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
3032#[rustc_legacy_const_generics(2)]
3033#[stable(feature = "simd_x86", since = "1.27.0")]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
3036 static_assert_uimm_bits!(MASK, 8);
3037 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
3038}
3039
3040#[inline]
3046#[target_feature(enable = "sse2")]
3047#[cfg_attr(test, assert_instr(movsd))]
3048#[stable(feature = "simd_x86", since = "1.27.0")]
3049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3050pub const fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
3051 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
3052}
3053
3054#[inline]
3059#[target_feature(enable = "sse2")]
3060#[stable(feature = "simd_x86", since = "1.27.0")]
3061#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3062pub const fn _mm_castpd_ps(a: __m128d) -> __m128 {
3063 unsafe { transmute(a) }
3064}
3065
3066#[inline]
3071#[target_feature(enable = "sse2")]
3072#[stable(feature = "simd_x86", since = "1.27.0")]
3073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3074pub const fn _mm_castpd_si128(a: __m128d) -> __m128i {
3075 unsafe { transmute(a) }
3076}
3077
3078#[inline]
3083#[target_feature(enable = "sse2")]
3084#[stable(feature = "simd_x86", since = "1.27.0")]
3085#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3086pub const fn _mm_castps_pd(a: __m128) -> __m128d {
3087 unsafe { transmute(a) }
3088}
3089
3090#[inline]
3095#[target_feature(enable = "sse2")]
3096#[stable(feature = "simd_x86", since = "1.27.0")]
3097#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3098pub const fn _mm_castps_si128(a: __m128) -> __m128i {
3099 unsafe { transmute(a) }
3100}
3101
3102#[inline]
3107#[target_feature(enable = "sse2")]
3108#[stable(feature = "simd_x86", since = "1.27.0")]
3109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3110pub const fn _mm_castsi128_pd(a: __m128i) -> __m128d {
3111 unsafe { transmute(a) }
3112}
3113
3114#[inline]
3119#[target_feature(enable = "sse2")]
3120#[stable(feature = "simd_x86", since = "1.27.0")]
3121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3122pub const fn _mm_castsi128_ps(a: __m128i) -> __m128 {
3123 unsafe { transmute(a) }
3124}
3125
3126#[inline]
3133#[target_feature(enable = "sse2")]
3134#[stable(feature = "simd_x86", since = "1.27.0")]
3135#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3136pub const fn _mm_undefined_pd() -> __m128d {
3137 const { unsafe { mem::zeroed() } }
3138}
3139
3140#[inline]
3147#[target_feature(enable = "sse2")]
3148#[stable(feature = "simd_x86", since = "1.27.0")]
3149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3150pub const fn _mm_undefined_si128() -> __m128i {
3151 const { unsafe { mem::zeroed() } }
3152}
3153
3154#[inline]
3162#[target_feature(enable = "sse2")]
3163#[cfg_attr(test, assert_instr(unpckhpd))]
3164#[stable(feature = "simd_x86", since = "1.27.0")]
3165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3166pub const fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3167 unsafe { simd_shuffle!(a, b, [1, 3]) }
3168}
3169
3170#[inline]
3178#[target_feature(enable = "sse2")]
3179#[cfg_attr(test, assert_instr(movlhps))]
3180#[stable(feature = "simd_x86", since = "1.27.0")]
3181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3182pub const fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3183 unsafe { simd_shuffle!(a, b, [0, 2]) }
3184}
3185
3186#[allow(improper_ctypes)]
3187unsafe extern "C" {
3188 #[link_name = "llvm.x86.sse2.pause"]
3189 fn pause();
3190 #[link_name = "llvm.x86.sse2.clflush"]
3191 fn clflush(p: *const u8);
3192 #[link_name = "llvm.x86.sse2.lfence"]
3193 fn lfence();
3194 #[link_name = "llvm.x86.sse2.mfence"]
3195 fn mfence();
3196 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3197 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3198 #[link_name = "llvm.x86.sse2.psad.bw"]
3199 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3200 #[link_name = "llvm.x86.sse2.psll.w"]
3201 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3202 #[link_name = "llvm.x86.sse2.psll.d"]
3203 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3204 #[link_name = "llvm.x86.sse2.psll.q"]
3205 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3206 #[link_name = "llvm.x86.sse2.psra.w"]
3207 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3208 #[link_name = "llvm.x86.sse2.psra.d"]
3209 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3210 #[link_name = "llvm.x86.sse2.psrl.w"]
3211 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3212 #[link_name = "llvm.x86.sse2.psrl.d"]
3213 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3214 #[link_name = "llvm.x86.sse2.psrl.q"]
3215 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3216 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3217 fn cvtps2dq(a: __m128) -> i32x4;
3218 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3219 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3220 #[link_name = "llvm.x86.sse2.packsswb.128"]
3221 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3222 #[link_name = "llvm.x86.sse2.packssdw.128"]
3223 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3224 #[link_name = "llvm.x86.sse2.packuswb.128"]
3225 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3226 #[link_name = "llvm.x86.sse2.max.sd"]
3227 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3228 #[link_name = "llvm.x86.sse2.max.pd"]
3229 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3230 #[link_name = "llvm.x86.sse2.min.sd"]
3231 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3232 #[link_name = "llvm.x86.sse2.min.pd"]
3233 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3234 #[link_name = "llvm.x86.sse2.cmp.sd"]
3235 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3236 #[link_name = "llvm.x86.sse2.cmp.pd"]
3237 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3238 #[link_name = "llvm.x86.sse2.comieq.sd"]
3239 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3240 #[link_name = "llvm.x86.sse2.comilt.sd"]
3241 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3242 #[link_name = "llvm.x86.sse2.comile.sd"]
3243 fn comilesd(a: __m128d, b: __m128d) -> i32;
3244 #[link_name = "llvm.x86.sse2.comigt.sd"]
3245 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3246 #[link_name = "llvm.x86.sse2.comige.sd"]
3247 fn comigesd(a: __m128d, b: __m128d) -> i32;
3248 #[link_name = "llvm.x86.sse2.comineq.sd"]
3249 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3250 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3251 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3252 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3253 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3254 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3255 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3256 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3257 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3258 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3259 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3260 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3261 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3262 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3263 fn cvtpd2dq(a: __m128d) -> i32x4;
3264 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3265 fn cvtsd2si(a: __m128d) -> i32;
3266 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3267 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3268 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3269 fn cvttpd2dq(a: __m128d) -> i32x4;
3270 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3271 fn cvttsd2si(a: __m128d) -> i32;
3272 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3273 fn cvttps2dq(a: __m128) -> i32x4;
3274}
3275
3276#[cfg(test)]
3277mod tests {
3278 use crate::core_arch::assert_eq_const as assert_eq;
3279 use crate::{
3280 core_arch::{simd::*, x86::*},
3281 hint::black_box,
3282 };
3283 use std::{boxed, f32, f64, mem, ptr};
3284 use stdarch_test::simd_test;
3285
3286 const NAN: f64 = f64::NAN;
3287
3288 #[test]
3289 fn test_mm_pause() {
3290 _mm_pause()
3291 }
3292
3293 #[simd_test(enable = "sse2")]
3294 fn test_mm_clflush() {
3295 let x = 0_u8;
3296 unsafe {
3297 _mm_clflush(ptr::addr_of!(x));
3298 }
3299 }
3300
3301 #[simd_test(enable = "sse2")]
3302 #[cfg_attr(miri, ignore)]
3304 fn test_mm_lfence() {
3305 _mm_lfence();
3306 }
3307
3308 #[simd_test(enable = "sse2")]
3309 #[cfg_attr(miri, ignore)]
3311 fn test_mm_mfence() {
3312 _mm_mfence();
3313 }
3314
3315 #[simd_test(enable = "sse2")]
3316 const fn test_mm_add_epi8() {
3317 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3318 #[rustfmt::skip]
3319 let b = _mm_setr_epi8(
3320 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3321 );
3322 let r = _mm_add_epi8(a, b);
3323 #[rustfmt::skip]
3324 let e = _mm_setr_epi8(
3325 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3326 );
3327 assert_eq_m128i(r, e);
3328 }
3329
3330 #[simd_test(enable = "sse2")]
3331 fn test_mm_add_epi8_overflow() {
3332 let a = _mm_set1_epi8(0x7F);
3333 let b = _mm_set1_epi8(1);
3334 let r = _mm_add_epi8(a, b);
3335 assert_eq_m128i(r, _mm_set1_epi8(-128));
3336 }
3337
3338 #[simd_test(enable = "sse2")]
3339 const fn test_mm_add_epi16() {
3340 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3341 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3342 let r = _mm_add_epi16(a, b);
3343 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3344 assert_eq_m128i(r, e);
3345 }
3346
3347 #[simd_test(enable = "sse2")]
3348 const fn test_mm_add_epi32() {
3349 let a = _mm_setr_epi32(0, 1, 2, 3);
3350 let b = _mm_setr_epi32(4, 5, 6, 7);
3351 let r = _mm_add_epi32(a, b);
3352 let e = _mm_setr_epi32(4, 6, 8, 10);
3353 assert_eq_m128i(r, e);
3354 }
3355
3356 #[simd_test(enable = "sse2")]
3357 const fn test_mm_add_epi64() {
3358 let a = _mm_setr_epi64x(0, 1);
3359 let b = _mm_setr_epi64x(2, 3);
3360 let r = _mm_add_epi64(a, b);
3361 let e = _mm_setr_epi64x(2, 4);
3362 assert_eq_m128i(r, e);
3363 }
3364
3365 #[simd_test(enable = "sse2")]
3366 const fn test_mm_adds_epi8() {
3367 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3368 #[rustfmt::skip]
3369 let b = _mm_setr_epi8(
3370 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3371 );
3372 let r = _mm_adds_epi8(a, b);
3373 #[rustfmt::skip]
3374 let e = _mm_setr_epi8(
3375 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3376 );
3377 assert_eq_m128i(r, e);
3378 }
3379
3380 #[simd_test(enable = "sse2")]
3381 fn test_mm_adds_epi8_saturate_positive() {
3382 let a = _mm_set1_epi8(0x7F);
3383 let b = _mm_set1_epi8(1);
3384 let r = _mm_adds_epi8(a, b);
3385 assert_eq_m128i(r, a);
3386 }
3387
3388 #[simd_test(enable = "sse2")]
3389 fn test_mm_adds_epi8_saturate_negative() {
3390 let a = _mm_set1_epi8(-0x80);
3391 let b = _mm_set1_epi8(-1);
3392 let r = _mm_adds_epi8(a, b);
3393 assert_eq_m128i(r, a);
3394 }
3395
3396 #[simd_test(enable = "sse2")]
3397 const fn test_mm_adds_epi16() {
3398 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3399 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3400 let r = _mm_adds_epi16(a, b);
3401 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3402 assert_eq_m128i(r, e);
3403 }
3404
3405 #[simd_test(enable = "sse2")]
3406 fn test_mm_adds_epi16_saturate_positive() {
3407 let a = _mm_set1_epi16(0x7FFF);
3408 let b = _mm_set1_epi16(1);
3409 let r = _mm_adds_epi16(a, b);
3410 assert_eq_m128i(r, a);
3411 }
3412
3413 #[simd_test(enable = "sse2")]
3414 fn test_mm_adds_epi16_saturate_negative() {
3415 let a = _mm_set1_epi16(-0x8000);
3416 let b = _mm_set1_epi16(-1);
3417 let r = _mm_adds_epi16(a, b);
3418 assert_eq_m128i(r, a);
3419 }
3420
3421 #[simd_test(enable = "sse2")]
3422 const fn test_mm_adds_epu8() {
3423 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3424 #[rustfmt::skip]
3425 let b = _mm_setr_epi8(
3426 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3427 );
3428 let r = _mm_adds_epu8(a, b);
3429 #[rustfmt::skip]
3430 let e = _mm_setr_epi8(
3431 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3432 );
3433 assert_eq_m128i(r, e);
3434 }
3435
3436 #[simd_test(enable = "sse2")]
3437 fn test_mm_adds_epu8_saturate() {
3438 let a = _mm_set1_epi8(!0);
3439 let b = _mm_set1_epi8(1);
3440 let r = _mm_adds_epu8(a, b);
3441 assert_eq_m128i(r, a);
3442 }
3443
3444 #[simd_test(enable = "sse2")]
3445 const fn test_mm_adds_epu16() {
3446 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3447 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3448 let r = _mm_adds_epu16(a, b);
3449 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3450 assert_eq_m128i(r, e);
3451 }
3452
3453 #[simd_test(enable = "sse2")]
3454 fn test_mm_adds_epu16_saturate() {
3455 let a = _mm_set1_epi16(!0);
3456 let b = _mm_set1_epi16(1);
3457 let r = _mm_adds_epu16(a, b);
3458 assert_eq_m128i(r, a);
3459 }
3460
3461 #[simd_test(enable = "sse2")]
3462 const fn test_mm_avg_epu8() {
3463 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3464 let r = _mm_avg_epu8(a, b);
3465 assert_eq_m128i(r, _mm_set1_epi8(6));
3466 }
3467
3468 #[simd_test(enable = "sse2")]
3469 const fn test_mm_avg_epu16() {
3470 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3471 let r = _mm_avg_epu16(a, b);
3472 assert_eq_m128i(r, _mm_set1_epi16(6));
3473 }
3474
3475 #[simd_test(enable = "sse2")]
3476 fn test_mm_madd_epi16() {
3477 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3478 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3479 let r = _mm_madd_epi16(a, b);
3480 let e = _mm_setr_epi32(29, 81, 149, 233);
3481 assert_eq_m128i(r, e);
3482
3483 let a = _mm_setr_epi16(
3486 i16::MAX,
3487 i16::MAX,
3488 i16::MIN,
3489 i16::MIN,
3490 i16::MIN,
3491 i16::MAX,
3492 0,
3493 0,
3494 );
3495 let b = _mm_setr_epi16(
3496 i16::MAX,
3497 i16::MAX,
3498 i16::MIN,
3499 i16::MIN,
3500 i16::MAX,
3501 i16::MIN,
3502 0,
3503 0,
3504 );
3505 let r = _mm_madd_epi16(a, b);
3506 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3507 assert_eq_m128i(r, e);
3508 }
3509
3510 #[simd_test(enable = "sse2")]
3511 const fn test_mm_max_epi16() {
3512 let a = _mm_set1_epi16(1);
3513 let b = _mm_set1_epi16(-1);
3514 let r = _mm_max_epi16(a, b);
3515 assert_eq_m128i(r, a);
3516 }
3517
3518 #[simd_test(enable = "sse2")]
3519 const fn test_mm_max_epu8() {
3520 let a = _mm_set1_epi8(1);
3521 let b = _mm_set1_epi8(!0);
3522 let r = _mm_max_epu8(a, b);
3523 assert_eq_m128i(r, b);
3524 }
3525
3526 #[simd_test(enable = "sse2")]
3527 const fn test_mm_min_epi16() {
3528 let a = _mm_set1_epi16(1);
3529 let b = _mm_set1_epi16(-1);
3530 let r = _mm_min_epi16(a, b);
3531 assert_eq_m128i(r, b);
3532 }
3533
3534 #[simd_test(enable = "sse2")]
3535 const fn test_mm_min_epu8() {
3536 let a = _mm_set1_epi8(1);
3537 let b = _mm_set1_epi8(!0);
3538 let r = _mm_min_epu8(a, b);
3539 assert_eq_m128i(r, a);
3540 }
3541
3542 #[simd_test(enable = "sse2")]
3543 const fn test_mm_mulhi_epi16() {
3544 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3545 let r = _mm_mulhi_epi16(a, b);
3546 assert_eq_m128i(r, _mm_set1_epi16(-16));
3547 }
3548
3549 #[simd_test(enable = "sse2")]
3550 const fn test_mm_mulhi_epu16() {
3551 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3552 let r = _mm_mulhi_epu16(a, b);
3553 assert_eq_m128i(r, _mm_set1_epi16(15));
3554 }
3555
3556 #[simd_test(enable = "sse2")]
3557 const fn test_mm_mullo_epi16() {
3558 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3559 let r = _mm_mullo_epi16(a, b);
3560 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3561 }
3562
3563 #[simd_test(enable = "sse2")]
3564 const fn test_mm_mul_epu32() {
3565 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3566 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3567 let r = _mm_mul_epu32(a, b);
3568 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3569 assert_eq_m128i(r, e);
3570 }
3571
3572 #[simd_test(enable = "sse2")]
3573 fn test_mm_sad_epu8() {
3574 #[rustfmt::skip]
3575 let a = _mm_setr_epi8(
3576 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3577 1, 2, 3, 4,
3578 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3579 1, 2, 3, 4,
3580 );
3581 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3582 let r = _mm_sad_epu8(a, b);
3583 let e = _mm_setr_epi64x(1020, 614);
3584 assert_eq_m128i(r, e);
3585 }
3586
3587 #[simd_test(enable = "sse2")]
3588 const fn test_mm_sub_epi8() {
3589 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3590 let r = _mm_sub_epi8(a, b);
3591 assert_eq_m128i(r, _mm_set1_epi8(-1));
3592 }
3593
3594 #[simd_test(enable = "sse2")]
3595 const fn test_mm_sub_epi16() {
3596 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3597 let r = _mm_sub_epi16(a, b);
3598 assert_eq_m128i(r, _mm_set1_epi16(-1));
3599 }
3600
3601 #[simd_test(enable = "sse2")]
3602 const fn test_mm_sub_epi32() {
3603 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3604 let r = _mm_sub_epi32(a, b);
3605 assert_eq_m128i(r, _mm_set1_epi32(-1));
3606 }
3607
3608 #[simd_test(enable = "sse2")]
3609 const fn test_mm_sub_epi64() {
3610 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3611 let r = _mm_sub_epi64(a, b);
3612 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3613 }
3614
3615 #[simd_test(enable = "sse2")]
3616 const fn test_mm_subs_epi8() {
3617 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3618 let r = _mm_subs_epi8(a, b);
3619 assert_eq_m128i(r, _mm_set1_epi8(3));
3620 }
3621
3622 #[simd_test(enable = "sse2")]
3623 fn test_mm_subs_epi8_saturate_positive() {
3624 let a = _mm_set1_epi8(0x7F);
3625 let b = _mm_set1_epi8(-1);
3626 let r = _mm_subs_epi8(a, b);
3627 assert_eq_m128i(r, a);
3628 }
3629
3630 #[simd_test(enable = "sse2")]
3631 fn test_mm_subs_epi8_saturate_negative() {
3632 let a = _mm_set1_epi8(-0x80);
3633 let b = _mm_set1_epi8(1);
3634 let r = _mm_subs_epi8(a, b);
3635 assert_eq_m128i(r, a);
3636 }
3637
3638 #[simd_test(enable = "sse2")]
3639 const fn test_mm_subs_epi16() {
3640 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3641 let r = _mm_subs_epi16(a, b);
3642 assert_eq_m128i(r, _mm_set1_epi16(3));
3643 }
3644
3645 #[simd_test(enable = "sse2")]
3646 fn test_mm_subs_epi16_saturate_positive() {
3647 let a = _mm_set1_epi16(0x7FFF);
3648 let b = _mm_set1_epi16(-1);
3649 let r = _mm_subs_epi16(a, b);
3650 assert_eq_m128i(r, a);
3651 }
3652
3653 #[simd_test(enable = "sse2")]
3654 fn test_mm_subs_epi16_saturate_negative() {
3655 let a = _mm_set1_epi16(-0x8000);
3656 let b = _mm_set1_epi16(1);
3657 let r = _mm_subs_epi16(a, b);
3658 assert_eq_m128i(r, a);
3659 }
3660
3661 #[simd_test(enable = "sse2")]
3662 const fn test_mm_subs_epu8() {
3663 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3664 let r = _mm_subs_epu8(a, b);
3665 assert_eq_m128i(r, _mm_set1_epi8(3));
3666 }
3667
3668 #[simd_test(enable = "sse2")]
3669 fn test_mm_subs_epu8_saturate() {
3670 let a = _mm_set1_epi8(0);
3671 let b = _mm_set1_epi8(1);
3672 let r = _mm_subs_epu8(a, b);
3673 assert_eq_m128i(r, a);
3674 }
3675
3676 #[simd_test(enable = "sse2")]
3677 const fn test_mm_subs_epu16() {
3678 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3679 let r = _mm_subs_epu16(a, b);
3680 assert_eq_m128i(r, _mm_set1_epi16(3));
3681 }
3682
3683 #[simd_test(enable = "sse2")]
3684 fn test_mm_subs_epu16_saturate() {
3685 let a = _mm_set1_epi16(0);
3686 let b = _mm_set1_epi16(1);
3687 let r = _mm_subs_epu16(a, b);
3688 assert_eq_m128i(r, a);
3689 }
3690
3691 #[simd_test(enable = "sse2")]
3692 const fn test_mm_slli_si128() {
3693 #[rustfmt::skip]
3694 let a = _mm_setr_epi8(
3695 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3696 );
3697 let r = _mm_slli_si128::<1>(a);
3698 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3699 assert_eq_m128i(r, e);
3700
3701 #[rustfmt::skip]
3702 let a = _mm_setr_epi8(
3703 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3704 );
3705 let r = _mm_slli_si128::<15>(a);
3706 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3707 assert_eq_m128i(r, e);
3708
3709 #[rustfmt::skip]
3710 let a = _mm_setr_epi8(
3711 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3712 );
3713 let r = _mm_slli_si128::<16>(a);
3714 assert_eq_m128i(r, _mm_set1_epi8(0));
3715 }
3716
3717 #[simd_test(enable = "sse2")]
3718 const fn test_mm_slli_epi16() {
3719 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3720 let r = _mm_slli_epi16::<4>(a);
3721 assert_eq_m128i(
3722 r,
3723 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3724 );
3725 let r = _mm_slli_epi16::<16>(a);
3726 assert_eq_m128i(r, _mm_set1_epi16(0));
3727 }
3728
3729 #[simd_test(enable = "sse2")]
3730 fn test_mm_sll_epi16() {
3731 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3732 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3733 assert_eq_m128i(
3734 r,
3735 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3736 );
3737 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3738 assert_eq_m128i(r, a);
3739 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3740 assert_eq_m128i(r, _mm_set1_epi16(0));
3741 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3742 assert_eq_m128i(r, _mm_set1_epi16(0));
3743 }
3744
3745 #[simd_test(enable = "sse2")]
3746 const fn test_mm_slli_epi32() {
3747 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3748 let r = _mm_slli_epi32::<4>(a);
3749 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3750 let r = _mm_slli_epi32::<32>(a);
3751 assert_eq_m128i(r, _mm_set1_epi32(0));
3752 }
3753
3754 #[simd_test(enable = "sse2")]
3755 fn test_mm_sll_epi32() {
3756 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3757 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3758 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3759 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3760 assert_eq_m128i(r, a);
3761 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3762 assert_eq_m128i(r, _mm_set1_epi32(0));
3763 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3764 assert_eq_m128i(r, _mm_set1_epi32(0));
3765 }
3766
3767 #[simd_test(enable = "sse2")]
3768 const fn test_mm_slli_epi64() {
3769 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3770 let r = _mm_slli_epi64::<4>(a);
3771 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3772 let r = _mm_slli_epi64::<64>(a);
3773 assert_eq_m128i(r, _mm_set1_epi64x(0));
3774 }
3775
3776 #[simd_test(enable = "sse2")]
3777 fn test_mm_sll_epi64() {
3778 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3779 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3780 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3781 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3782 assert_eq_m128i(r, a);
3783 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3784 assert_eq_m128i(r, _mm_set1_epi64x(0));
3785 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3786 assert_eq_m128i(r, _mm_set1_epi64x(0));
3787 }
3788
3789 #[simd_test(enable = "sse2")]
3790 const fn test_mm_srai_epi16() {
3791 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3792 let r = _mm_srai_epi16::<4>(a);
3793 assert_eq_m128i(
3794 r,
3795 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3796 );
3797 let r = _mm_srai_epi16::<16>(a);
3798 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3799 }
3800
3801 #[simd_test(enable = "sse2")]
3802 fn test_mm_sra_epi16() {
3803 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3804 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3805 assert_eq_m128i(
3806 r,
3807 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3808 );
3809 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3810 assert_eq_m128i(r, a);
3811 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3812 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3813 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3814 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3815 }
3816
3817 #[simd_test(enable = "sse2")]
3818 const fn test_mm_srai_epi32() {
3819 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3820 let r = _mm_srai_epi32::<4>(a);
3821 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3822 let r = _mm_srai_epi32::<32>(a);
3823 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3824 }
3825
3826 #[simd_test(enable = "sse2")]
3827 fn test_mm_sra_epi32() {
3828 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3829 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3830 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3831 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3832 assert_eq_m128i(r, a);
3833 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3834 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3835 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3836 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3837 }
3838
3839 #[simd_test(enable = "sse2")]
3840 const fn test_mm_srli_si128() {
3841 #[rustfmt::skip]
3842 let a = _mm_setr_epi8(
3843 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3844 );
3845 let r = _mm_srli_si128::<1>(a);
3846 #[rustfmt::skip]
3847 let e = _mm_setr_epi8(
3848 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3849 );
3850 assert_eq_m128i(r, e);
3851
3852 #[rustfmt::skip]
3853 let a = _mm_setr_epi8(
3854 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3855 );
3856 let r = _mm_srli_si128::<15>(a);
3857 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3858 assert_eq_m128i(r, e);
3859
3860 #[rustfmt::skip]
3861 let a = _mm_setr_epi8(
3862 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3863 );
3864 let r = _mm_srli_si128::<16>(a);
3865 assert_eq_m128i(r, _mm_set1_epi8(0));
3866 }
3867
3868 #[simd_test(enable = "sse2")]
3869 const fn test_mm_srli_epi16() {
3870 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3871 let r = _mm_srli_epi16::<4>(a);
3872 assert_eq_m128i(
3873 r,
3874 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3875 );
3876 let r = _mm_srli_epi16::<16>(a);
3877 assert_eq_m128i(r, _mm_set1_epi16(0));
3878 }
3879
3880 #[simd_test(enable = "sse2")]
3881 fn test_mm_srl_epi16() {
3882 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3883 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3884 assert_eq_m128i(
3885 r,
3886 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3887 );
3888 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3889 assert_eq_m128i(r, a);
3890 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3891 assert_eq_m128i(r, _mm_set1_epi16(0));
3892 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3893 assert_eq_m128i(r, _mm_set1_epi16(0));
3894 }
3895
3896 #[simd_test(enable = "sse2")]
3897 const fn test_mm_srli_epi32() {
3898 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3899 let r = _mm_srli_epi32::<4>(a);
3900 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3901 let r = _mm_srli_epi32::<32>(a);
3902 assert_eq_m128i(r, _mm_set1_epi32(0));
3903 }
3904
3905 #[simd_test(enable = "sse2")]
3906 fn test_mm_srl_epi32() {
3907 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3908 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3909 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3910 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3911 assert_eq_m128i(r, a);
3912 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3913 assert_eq_m128i(r, _mm_set1_epi32(0));
3914 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3915 assert_eq_m128i(r, _mm_set1_epi32(0));
3916 }
3917
3918 #[simd_test(enable = "sse2")]
3919 const fn test_mm_srli_epi64() {
3920 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3921 let r = _mm_srli_epi64::<4>(a);
3922 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3923 let r = _mm_srli_epi64::<64>(a);
3924 assert_eq_m128i(r, _mm_set1_epi64x(0));
3925 }
3926
3927 #[simd_test(enable = "sse2")]
3928 fn test_mm_srl_epi64() {
3929 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3930 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3931 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3932 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3933 assert_eq_m128i(r, a);
3934 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3935 assert_eq_m128i(r, _mm_set1_epi64x(0));
3936 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3937 assert_eq_m128i(r, _mm_set1_epi64x(0));
3938 }
3939
3940 #[simd_test(enable = "sse2")]
3941 const fn test_mm_and_si128() {
3942 let a = _mm_set1_epi8(5);
3943 let b = _mm_set1_epi8(3);
3944 let r = _mm_and_si128(a, b);
3945 assert_eq_m128i(r, _mm_set1_epi8(1));
3946 }
3947
3948 #[simd_test(enable = "sse2")]
3949 const fn test_mm_andnot_si128() {
3950 let a = _mm_set1_epi8(5);
3951 let b = _mm_set1_epi8(3);
3952 let r = _mm_andnot_si128(a, b);
3953 assert_eq_m128i(r, _mm_set1_epi8(2));
3954 }
3955
3956 #[simd_test(enable = "sse2")]
3957 const fn test_mm_or_si128() {
3958 let a = _mm_set1_epi8(5);
3959 let b = _mm_set1_epi8(3);
3960 let r = _mm_or_si128(a, b);
3961 assert_eq_m128i(r, _mm_set1_epi8(7));
3962 }
3963
3964 #[simd_test(enable = "sse2")]
3965 const fn test_mm_xor_si128() {
3966 let a = _mm_set1_epi8(5);
3967 let b = _mm_set1_epi8(3);
3968 let r = _mm_xor_si128(a, b);
3969 assert_eq_m128i(r, _mm_set1_epi8(6));
3970 }
3971
3972 #[simd_test(enable = "sse2")]
3973 const fn test_mm_cmpeq_epi8() {
3974 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3975 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3976 let r = _mm_cmpeq_epi8(a, b);
3977 #[rustfmt::skip]
3978 assert_eq_m128i(
3979 r,
3980 _mm_setr_epi8(
3981 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3982 )
3983 );
3984 }
3985
3986 #[simd_test(enable = "sse2")]
3987 const fn test_mm_cmpeq_epi16() {
3988 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3989 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3990 let r = _mm_cmpeq_epi16(a, b);
3991 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3992 }
3993
3994 #[simd_test(enable = "sse2")]
3995 const fn test_mm_cmpeq_epi32() {
3996 let a = _mm_setr_epi32(0, 1, 2, 3);
3997 let b = _mm_setr_epi32(3, 2, 2, 0);
3998 let r = _mm_cmpeq_epi32(a, b);
3999 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
4000 }
4001
4002 #[simd_test(enable = "sse2")]
4003 const fn test_mm_cmpgt_epi8() {
4004 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4005 let b = _mm_set1_epi8(0);
4006 let r = _mm_cmpgt_epi8(a, b);
4007 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4008 assert_eq_m128i(r, e);
4009 }
4010
4011 #[simd_test(enable = "sse2")]
4012 const fn test_mm_cmpgt_epi16() {
4013 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4014 let b = _mm_set1_epi16(0);
4015 let r = _mm_cmpgt_epi16(a, b);
4016 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4017 assert_eq_m128i(r, e);
4018 }
4019
4020 #[simd_test(enable = "sse2")]
4021 const fn test_mm_cmpgt_epi32() {
4022 let a = _mm_set_epi32(5, 0, 0, 0);
4023 let b = _mm_set1_epi32(0);
4024 let r = _mm_cmpgt_epi32(a, b);
4025 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4026 }
4027
4028 #[simd_test(enable = "sse2")]
4029 const fn test_mm_cmplt_epi8() {
4030 let a = _mm_set1_epi8(0);
4031 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4032 let r = _mm_cmplt_epi8(a, b);
4033 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4034 assert_eq_m128i(r, e);
4035 }
4036
4037 #[simd_test(enable = "sse2")]
4038 const fn test_mm_cmplt_epi16() {
4039 let a = _mm_set1_epi16(0);
4040 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4041 let r = _mm_cmplt_epi16(a, b);
4042 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4043 assert_eq_m128i(r, e);
4044 }
4045
4046 #[simd_test(enable = "sse2")]
4047 const fn test_mm_cmplt_epi32() {
4048 let a = _mm_set1_epi32(0);
4049 let b = _mm_set_epi32(5, 0, 0, 0);
4050 let r = _mm_cmplt_epi32(a, b);
4051 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4052 }
4053
4054 #[simd_test(enable = "sse2")]
4055 const fn test_mm_cvtepi32_pd() {
4056 let a = _mm_set_epi32(35, 25, 15, 5);
4057 let r = _mm_cvtepi32_pd(a);
4058 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
4059 }
4060
4061 #[simd_test(enable = "sse2")]
4062 const fn test_mm_cvtsi32_sd() {
4063 let a = _mm_set1_pd(3.5);
4064 let r = _mm_cvtsi32_sd(a, 5);
4065 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
4066 }
4067
4068 #[simd_test(enable = "sse2")]
4069 const fn test_mm_cvtepi32_ps() {
4070 let a = _mm_setr_epi32(1, 2, 3, 4);
4071 let r = _mm_cvtepi32_ps(a);
4072 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
4073 }
4074
4075 #[simd_test(enable = "sse2")]
4076 fn test_mm_cvtps_epi32() {
4077 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4078 let r = _mm_cvtps_epi32(a);
4079 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
4080 }
4081
4082 #[simd_test(enable = "sse2")]
4083 const fn test_mm_cvtsi32_si128() {
4084 let r = _mm_cvtsi32_si128(5);
4085 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
4086 }
4087
4088 #[simd_test(enable = "sse2")]
4089 const fn test_mm_cvtsi128_si32() {
4090 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
4091 assert_eq!(r, 5);
4092 }
4093
4094 #[simd_test(enable = "sse2")]
4095 const fn test_mm_set_epi64x() {
4096 let r = _mm_set_epi64x(0, 1);
4097 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
4098 }
4099
4100 #[simd_test(enable = "sse2")]
4101 const fn test_mm_set_epi32() {
4102 let r = _mm_set_epi32(0, 1, 2, 3);
4103 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
4104 }
4105
4106 #[simd_test(enable = "sse2")]
4107 const fn test_mm_set_epi16() {
4108 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4109 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
4110 }
4111
4112 #[simd_test(enable = "sse2")]
4113 const fn test_mm_set_epi8() {
4114 #[rustfmt::skip]
4115 let r = _mm_set_epi8(
4116 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4117 );
4118 #[rustfmt::skip]
4119 let e = _mm_setr_epi8(
4120 15, 14, 13, 12, 11, 10, 9, 8,
4121 7, 6, 5, 4, 3, 2, 1, 0,
4122 );
4123 assert_eq_m128i(r, e);
4124 }
4125
4126 #[simd_test(enable = "sse2")]
4127 const fn test_mm_set1_epi64x() {
4128 let r = _mm_set1_epi64x(1);
4129 assert_eq_m128i(r, _mm_set1_epi64x(1));
4130 }
4131
4132 #[simd_test(enable = "sse2")]
4133 const fn test_mm_set1_epi32() {
4134 let r = _mm_set1_epi32(1);
4135 assert_eq_m128i(r, _mm_set1_epi32(1));
4136 }
4137
4138 #[simd_test(enable = "sse2")]
4139 const fn test_mm_set1_epi16() {
4140 let r = _mm_set1_epi16(1);
4141 assert_eq_m128i(r, _mm_set1_epi16(1));
4142 }
4143
4144 #[simd_test(enable = "sse2")]
4145 const fn test_mm_set1_epi8() {
4146 let r = _mm_set1_epi8(1);
4147 assert_eq_m128i(r, _mm_set1_epi8(1));
4148 }
4149
4150 #[simd_test(enable = "sse2")]
4151 const fn test_mm_setr_epi32() {
4152 let r = _mm_setr_epi32(0, 1, 2, 3);
4153 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4154 }
4155
4156 #[simd_test(enable = "sse2")]
4157 const fn test_mm_setr_epi16() {
4158 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4159 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4160 }
4161
4162 #[simd_test(enable = "sse2")]
4163 const fn test_mm_setr_epi8() {
4164 #[rustfmt::skip]
4165 let r = _mm_setr_epi8(
4166 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4167 );
4168 #[rustfmt::skip]
4169 let e = _mm_setr_epi8(
4170 0, 1, 2, 3, 4, 5, 6, 7,
4171 8, 9, 10, 11, 12, 13, 14, 15,
4172 );
4173 assert_eq_m128i(r, e);
4174 }
4175
4176 #[simd_test(enable = "sse2")]
4177 const fn test_mm_setzero_si128() {
4178 let r = _mm_setzero_si128();
4179 assert_eq_m128i(r, _mm_set1_epi64x(0));
4180 }
4181
4182 #[simd_test(enable = "sse2")]
4183 const fn test_mm_loadl_epi64() {
4184 let a = _mm_setr_epi64x(6, 5);
4185 let r = unsafe { _mm_loadl_epi64(ptr::addr_of!(a)) };
4186 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4187 }
4188
4189 #[simd_test(enable = "sse2")]
4190 const fn test_mm_load_si128() {
4191 let a = _mm_set_epi64x(5, 6);
4192 let r = unsafe { _mm_load_si128(ptr::addr_of!(a) as *const _) };
4193 assert_eq_m128i(a, r);
4194 }
4195
4196 #[simd_test(enable = "sse2")]
4197 const fn test_mm_loadu_si128() {
4198 let a = _mm_set_epi64x(5, 6);
4199 let r = unsafe { _mm_loadu_si128(ptr::addr_of!(a) as *const _) };
4200 assert_eq_m128i(a, r);
4201 }
4202
4203 #[simd_test(enable = "sse2")]
4204 #[cfg_attr(miri, ignore)]
4207 fn test_mm_maskmoveu_si128() {
4208 let a = _mm_set1_epi8(9);
4209 #[rustfmt::skip]
4210 let mask = _mm_set_epi8(
4211 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4212 0, 0, 0, 0, 0, 0, 0, 0,
4213 );
4214 let mut r = _mm_set1_epi8(0);
4215 unsafe {
4216 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4217 }
4218 _mm_sfence();
4219 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4220 assert_eq_m128i(r, e);
4221 }
4222
4223 #[simd_test(enable = "sse2")]
4224 const fn test_mm_store_si128() {
4225 let a = _mm_set1_epi8(9);
4226 let mut r = _mm_set1_epi8(0);
4227 unsafe {
4228 _mm_store_si128(&mut r, a);
4229 }
4230 assert_eq_m128i(r, a);
4231 }
4232
4233 #[simd_test(enable = "sse2")]
4234 const fn test_mm_storeu_si128() {
4235 let a = _mm_set1_epi8(9);
4236 let mut r = _mm_set1_epi8(0);
4237 unsafe {
4238 _mm_storeu_si128(&mut r, a);
4239 }
4240 assert_eq_m128i(r, a);
4241 }
4242
4243 #[simd_test(enable = "sse2")]
4244 const fn test_mm_storel_epi64() {
4245 let a = _mm_setr_epi64x(2, 9);
4246 let mut r = _mm_set1_epi8(0);
4247 unsafe {
4248 _mm_storel_epi64(&mut r, a);
4249 }
4250 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4251 }
4252
4253 #[simd_test(enable = "sse2")]
4254 #[cfg_attr(miri, ignore)]
4257 fn test_mm_stream_si128() {
4258 let a = _mm_setr_epi32(1, 2, 3, 4);
4259 let mut r = _mm_undefined_si128();
4260 unsafe {
4261 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4262 }
4263 _mm_sfence();
4264 assert_eq_m128i(r, a);
4265 }
4266
4267 #[simd_test(enable = "sse2")]
4268 #[cfg_attr(miri, ignore)]
4271 fn test_mm_stream_si32() {
4272 let a: i32 = 7;
4273 let mut mem = boxed::Box::<i32>::new(-1);
4274 unsafe {
4275 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4276 }
4277 _mm_sfence();
4278 assert_eq!(a, *mem);
4279 }
4280
4281 #[simd_test(enable = "sse2")]
4282 const fn test_mm_move_epi64() {
4283 let a = _mm_setr_epi64x(5, 6);
4284 let r = _mm_move_epi64(a);
4285 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4286 }
4287
4288 #[simd_test(enable = "sse2")]
4289 fn test_mm_packs_epi16() {
4290 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4291 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4292 let r = _mm_packs_epi16(a, b);
4293 #[rustfmt::skip]
4294 assert_eq_m128i(
4295 r,
4296 _mm_setr_epi8(
4297 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4298 )
4299 );
4300 }
4301
4302 #[simd_test(enable = "sse2")]
4303 fn test_mm_packs_epi32() {
4304 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4305 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4306 let r = _mm_packs_epi32(a, b);
4307 assert_eq_m128i(
4308 r,
4309 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4310 );
4311 }
4312
4313 #[simd_test(enable = "sse2")]
4314 fn test_mm_packus_epi16() {
4315 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4316 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4317 let r = _mm_packus_epi16(a, b);
4318 assert_eq_m128i(
4319 r,
4320 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4321 );
4322 }
4323
4324 #[simd_test(enable = "sse2")]
4325 const fn test_mm_extract_epi16() {
4326 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4327 let r1 = _mm_extract_epi16::<0>(a);
4328 let r2 = _mm_extract_epi16::<3>(a);
4329 assert_eq!(r1, 0xFFFF);
4330 assert_eq!(r2, 3);
4331 }
4332
4333 #[simd_test(enable = "sse2")]
4334 const fn test_mm_insert_epi16() {
4335 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4336 let r = _mm_insert_epi16::<0>(a, 9);
4337 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4338 assert_eq_m128i(r, e);
4339 }
4340
4341 #[simd_test(enable = "sse2")]
4342 const fn test_mm_movemask_epi8() {
4343 #[rustfmt::skip]
4344 let a = _mm_setr_epi8(
4345 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4346 0b0101, 0b1111_0000u8 as i8, 0, 0,
4347 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4348 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4349 );
4350 let r = _mm_movemask_epi8(a);
4351 assert_eq!(r, 0b10100110_00100101);
4352 }
4353
4354 #[simd_test(enable = "sse2")]
4355 const fn test_mm_shuffle_epi32() {
4356 let a = _mm_setr_epi32(5, 10, 15, 20);
4357 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4358 let e = _mm_setr_epi32(20, 10, 10, 5);
4359 assert_eq_m128i(r, e);
4360 }
4361
4362 #[simd_test(enable = "sse2")]
4363 const fn test_mm_shufflehi_epi16() {
4364 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4365 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4366 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4367 assert_eq_m128i(r, e);
4368 }
4369
4370 #[simd_test(enable = "sse2")]
4371 const fn test_mm_shufflelo_epi16() {
4372 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4373 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4374 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4375 assert_eq_m128i(r, e);
4376 }
4377
4378 #[simd_test(enable = "sse2")]
4379 const fn test_mm_unpackhi_epi8() {
4380 #[rustfmt::skip]
4381 let a = _mm_setr_epi8(
4382 0, 1, 2, 3, 4, 5, 6, 7,
4383 8, 9, 10, 11, 12, 13, 14, 15,
4384 );
4385 #[rustfmt::skip]
4386 let b = _mm_setr_epi8(
4387 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4388 );
4389 let r = _mm_unpackhi_epi8(a, b);
4390 #[rustfmt::skip]
4391 let e = _mm_setr_epi8(
4392 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4393 );
4394 assert_eq_m128i(r, e);
4395 }
4396
4397 #[simd_test(enable = "sse2")]
4398 const fn test_mm_unpackhi_epi16() {
4399 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4400 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4401 let r = _mm_unpackhi_epi16(a, b);
4402 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4403 assert_eq_m128i(r, e);
4404 }
4405
4406 #[simd_test(enable = "sse2")]
4407 const fn test_mm_unpackhi_epi32() {
4408 let a = _mm_setr_epi32(0, 1, 2, 3);
4409 let b = _mm_setr_epi32(4, 5, 6, 7);
4410 let r = _mm_unpackhi_epi32(a, b);
4411 let e = _mm_setr_epi32(2, 6, 3, 7);
4412 assert_eq_m128i(r, e);
4413 }
4414
4415 #[simd_test(enable = "sse2")]
4416 const fn test_mm_unpackhi_epi64() {
4417 let a = _mm_setr_epi64x(0, 1);
4418 let b = _mm_setr_epi64x(2, 3);
4419 let r = _mm_unpackhi_epi64(a, b);
4420 let e = _mm_setr_epi64x(1, 3);
4421 assert_eq_m128i(r, e);
4422 }
4423
4424 #[simd_test(enable = "sse2")]
4425 const fn test_mm_unpacklo_epi8() {
4426 #[rustfmt::skip]
4427 let a = _mm_setr_epi8(
4428 0, 1, 2, 3, 4, 5, 6, 7,
4429 8, 9, 10, 11, 12, 13, 14, 15,
4430 );
4431 #[rustfmt::skip]
4432 let b = _mm_setr_epi8(
4433 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4434 );
4435 let r = _mm_unpacklo_epi8(a, b);
4436 #[rustfmt::skip]
4437 let e = _mm_setr_epi8(
4438 0, 16, 1, 17, 2, 18, 3, 19,
4439 4, 20, 5, 21, 6, 22, 7, 23,
4440 );
4441 assert_eq_m128i(r, e);
4442 }
4443
4444 #[simd_test(enable = "sse2")]
4445 const fn test_mm_unpacklo_epi16() {
4446 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4447 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4448 let r = _mm_unpacklo_epi16(a, b);
4449 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4450 assert_eq_m128i(r, e);
4451 }
4452
4453 #[simd_test(enable = "sse2")]
4454 const fn test_mm_unpacklo_epi32() {
4455 let a = _mm_setr_epi32(0, 1, 2, 3);
4456 let b = _mm_setr_epi32(4, 5, 6, 7);
4457 let r = _mm_unpacklo_epi32(a, b);
4458 let e = _mm_setr_epi32(0, 4, 1, 5);
4459 assert_eq_m128i(r, e);
4460 }
4461
4462 #[simd_test(enable = "sse2")]
4463 const fn test_mm_unpacklo_epi64() {
4464 let a = _mm_setr_epi64x(0, 1);
4465 let b = _mm_setr_epi64x(2, 3);
4466 let r = _mm_unpacklo_epi64(a, b);
4467 let e = _mm_setr_epi64x(0, 2);
4468 assert_eq_m128i(r, e);
4469 }
4470
4471 #[simd_test(enable = "sse2")]
4472 const fn test_mm_add_sd() {
4473 let a = _mm_setr_pd(1.0, 2.0);
4474 let b = _mm_setr_pd(5.0, 10.0);
4475 let r = _mm_add_sd(a, b);
4476 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4477 }
4478
4479 #[simd_test(enable = "sse2")]
4480 const fn test_mm_add_pd() {
4481 let a = _mm_setr_pd(1.0, 2.0);
4482 let b = _mm_setr_pd(5.0, 10.0);
4483 let r = _mm_add_pd(a, b);
4484 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4485 }
4486
4487 #[simd_test(enable = "sse2")]
4488 const fn test_mm_div_sd() {
4489 let a = _mm_setr_pd(1.0, 2.0);
4490 let b = _mm_setr_pd(5.0, 10.0);
4491 let r = _mm_div_sd(a, b);
4492 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4493 }
4494
4495 #[simd_test(enable = "sse2")]
4496 const fn test_mm_div_pd() {
4497 let a = _mm_setr_pd(1.0, 2.0);
4498 let b = _mm_setr_pd(5.0, 10.0);
4499 let r = _mm_div_pd(a, b);
4500 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4501 }
4502
4503 #[simd_test(enable = "sse2")]
4504 fn test_mm_max_sd() {
4505 let a = _mm_setr_pd(1.0, 2.0);
4506 let b = _mm_setr_pd(5.0, 10.0);
4507 let r = _mm_max_sd(a, b);
4508 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4509 }
4510
4511 #[simd_test(enable = "sse2")]
4512 fn test_mm_max_pd() {
4513 let a = _mm_setr_pd(1.0, 2.0);
4514 let b = _mm_setr_pd(5.0, 10.0);
4515 let r = _mm_max_pd(a, b);
4516 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4517
4518 let a = _mm_setr_pd(-0.0, 0.0);
4520 let b = _mm_setr_pd(0.0, 0.0);
4521 let r1 = _mm_castpd_si128(_mm_max_pd(a, b));
4523 let r2 = _mm_castpd_si128(_mm_max_pd(b, a));
4524 let a = _mm_castpd_si128(a);
4525 let b = _mm_castpd_si128(b);
4526 assert_eq_m128i(r1, b);
4527 assert_eq_m128i(r2, a);
4528 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4530
4531 #[simd_test(enable = "sse2")]
4532 fn test_mm_min_sd() {
4533 let a = _mm_setr_pd(1.0, 2.0);
4534 let b = _mm_setr_pd(5.0, 10.0);
4535 let r = _mm_min_sd(a, b);
4536 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4537 }
4538
4539 #[simd_test(enable = "sse2")]
4540 fn test_mm_min_pd() {
4541 let a = _mm_setr_pd(1.0, 2.0);
4542 let b = _mm_setr_pd(5.0, 10.0);
4543 let r = _mm_min_pd(a, b);
4544 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4545
4546 let a = _mm_setr_pd(-0.0, 0.0);
4548 let b = _mm_setr_pd(0.0, 0.0);
4549 let r1 = _mm_castpd_si128(_mm_min_pd(a, b));
4551 let r2 = _mm_castpd_si128(_mm_min_pd(b, a));
4552 let a = _mm_castpd_si128(a);
4553 let b = _mm_castpd_si128(b);
4554 assert_eq_m128i(r1, b);
4555 assert_eq_m128i(r2, a);
4556 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4558
4559 #[simd_test(enable = "sse2")]
4560 const fn test_mm_mul_sd() {
4561 let a = _mm_setr_pd(1.0, 2.0);
4562 let b = _mm_setr_pd(5.0, 10.0);
4563 let r = _mm_mul_sd(a, b);
4564 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4565 }
4566
4567 #[simd_test(enable = "sse2")]
4568 const fn test_mm_mul_pd() {
4569 let a = _mm_setr_pd(1.0, 2.0);
4570 let b = _mm_setr_pd(5.0, 10.0);
4571 let r = _mm_mul_pd(a, b);
4572 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4573 }
4574
4575 #[simd_test(enable = "sse2")]
4576 fn test_mm_sqrt_sd() {
4577 let a = _mm_setr_pd(1.0, 2.0);
4578 let b = _mm_setr_pd(5.0, 10.0);
4579 let r = _mm_sqrt_sd(a, b);
4580 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4581 }
4582
4583 #[simd_test(enable = "sse2")]
4584 fn test_mm_sqrt_pd() {
4585 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4586 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4587 }
4588
4589 #[simd_test(enable = "sse2")]
4590 const fn test_mm_sub_sd() {
4591 let a = _mm_setr_pd(1.0, 2.0);
4592 let b = _mm_setr_pd(5.0, 10.0);
4593 let r = _mm_sub_sd(a, b);
4594 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4595 }
4596
4597 #[simd_test(enable = "sse2")]
4598 const fn test_mm_sub_pd() {
4599 let a = _mm_setr_pd(1.0, 2.0);
4600 let b = _mm_setr_pd(5.0, 10.0);
4601 let r = _mm_sub_pd(a, b);
4602 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4603 }
4604
4605 #[simd_test(enable = "sse2")]
4606 const fn test_mm_and_pd() {
4607 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4608 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4609 let r = _mm_and_pd(a, b);
4610 let e = f64x2::from_bits(u64x2::splat(1)).as_m128d();
4611 assert_eq_m128d(r, e);
4612 }
4613
4614 #[simd_test(enable = "sse2")]
4615 const fn test_mm_andnot_pd() {
4616 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4617 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4618 let r = _mm_andnot_pd(a, b);
4619 let e = f64x2::from_bits(u64x2::splat(2)).as_m128d();
4620 assert_eq_m128d(r, e);
4621 }
4622
4623 #[simd_test(enable = "sse2")]
4624 const fn test_mm_or_pd() {
4625 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4626 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4627 let r = _mm_or_pd(a, b);
4628 let e = f64x2::from_bits(u64x2::splat(7)).as_m128d();
4629 assert_eq_m128d(r, e);
4630 }
4631
4632 #[simd_test(enable = "sse2")]
4633 const fn test_mm_xor_pd() {
4634 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4635 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4636 let r = _mm_xor_pd(a, b);
4637 let e = f64x2::from_bits(u64x2::splat(6)).as_m128d();
4638 assert_eq_m128d(r, e);
4639 }
4640
4641 #[simd_test(enable = "sse2")]
4642 fn test_mm_cmpeq_sd() {
4643 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4644 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4645 let r = _mm_castpd_si128(_mm_cmpeq_sd(a, b));
4646 assert_eq_m128i(r, e);
4647 }
4648
4649 #[simd_test(enable = "sse2")]
4650 fn test_mm_cmplt_sd() {
4651 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4652 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4653 let r = _mm_castpd_si128(_mm_cmplt_sd(a, b));
4654 assert_eq_m128i(r, e);
4655 }
4656
4657 #[simd_test(enable = "sse2")]
4658 fn test_mm_cmple_sd() {
4659 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4660 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4661 let r = _mm_castpd_si128(_mm_cmple_sd(a, b));
4662 assert_eq_m128i(r, e);
4663 }
4664
4665 #[simd_test(enable = "sse2")]
4666 fn test_mm_cmpgt_sd() {
4667 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4668 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4669 let r = _mm_castpd_si128(_mm_cmpgt_sd(a, b));
4670 assert_eq_m128i(r, e);
4671 }
4672
4673 #[simd_test(enable = "sse2")]
4674 fn test_mm_cmpge_sd() {
4675 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4676 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4677 let r = _mm_castpd_si128(_mm_cmpge_sd(a, b));
4678 assert_eq_m128i(r, e);
4679 }
4680
4681 #[simd_test(enable = "sse2")]
4682 fn test_mm_cmpord_sd() {
4683 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4684 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4685 let r = _mm_castpd_si128(_mm_cmpord_sd(a, b));
4686 assert_eq_m128i(r, e);
4687 }
4688
4689 #[simd_test(enable = "sse2")]
4690 fn test_mm_cmpunord_sd() {
4691 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4692 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4693 let r = _mm_castpd_si128(_mm_cmpunord_sd(a, b));
4694 assert_eq_m128i(r, e);
4695 }
4696
4697 #[simd_test(enable = "sse2")]
4698 fn test_mm_cmpneq_sd() {
4699 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4700 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4701 let r = _mm_castpd_si128(_mm_cmpneq_sd(a, b));
4702 assert_eq_m128i(r, e);
4703 }
4704
4705 #[simd_test(enable = "sse2")]
4706 fn test_mm_cmpnlt_sd() {
4707 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4708 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4709 let r = _mm_castpd_si128(_mm_cmpnlt_sd(a, b));
4710 assert_eq_m128i(r, e);
4711 }
4712
4713 #[simd_test(enable = "sse2")]
4714 fn test_mm_cmpnle_sd() {
4715 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4716 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4717 let r = _mm_castpd_si128(_mm_cmpnle_sd(a, b));
4718 assert_eq_m128i(r, e);
4719 }
4720
4721 #[simd_test(enable = "sse2")]
4722 fn test_mm_cmpngt_sd() {
4723 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4724 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4725 let r = _mm_castpd_si128(_mm_cmpngt_sd(a, b));
4726 assert_eq_m128i(r, e);
4727 }
4728
4729 #[simd_test(enable = "sse2")]
4730 fn test_mm_cmpnge_sd() {
4731 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4732 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4733 let r = _mm_castpd_si128(_mm_cmpnge_sd(a, b));
4734 assert_eq_m128i(r, e);
4735 }
4736
4737 #[simd_test(enable = "sse2")]
4738 fn test_mm_cmpeq_pd() {
4739 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4740 let e = _mm_setr_epi64x(!0, 0);
4741 let r = _mm_castpd_si128(_mm_cmpeq_pd(a, b));
4742 assert_eq_m128i(r, e);
4743 }
4744
4745 #[simd_test(enable = "sse2")]
4746 fn test_mm_cmplt_pd() {
4747 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4748 let e = _mm_setr_epi64x(0, !0);
4749 let r = _mm_castpd_si128(_mm_cmplt_pd(a, b));
4750 assert_eq_m128i(r, e);
4751 }
4752
4753 #[simd_test(enable = "sse2")]
4754 fn test_mm_cmple_pd() {
4755 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4756 let e = _mm_setr_epi64x(!0, !0);
4757 let r = _mm_castpd_si128(_mm_cmple_pd(a, b));
4758 assert_eq_m128i(r, e);
4759 }
4760
4761 #[simd_test(enable = "sse2")]
4762 fn test_mm_cmpgt_pd() {
4763 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4764 let e = _mm_setr_epi64x(0, 0);
4765 let r = _mm_castpd_si128(_mm_cmpgt_pd(a, b));
4766 assert_eq_m128i(r, e);
4767 }
4768
4769 #[simd_test(enable = "sse2")]
4770 fn test_mm_cmpge_pd() {
4771 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4772 let e = _mm_setr_epi64x(!0, 0);
4773 let r = _mm_castpd_si128(_mm_cmpge_pd(a, b));
4774 assert_eq_m128i(r, e);
4775 }
4776
4777 #[simd_test(enable = "sse2")]
4778 fn test_mm_cmpord_pd() {
4779 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4780 let e = _mm_setr_epi64x(0, !0);
4781 let r = _mm_castpd_si128(_mm_cmpord_pd(a, b));
4782 assert_eq_m128i(r, e);
4783 }
4784
4785 #[simd_test(enable = "sse2")]
4786 fn test_mm_cmpunord_pd() {
4787 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4788 let e = _mm_setr_epi64x(!0, 0);
4789 let r = _mm_castpd_si128(_mm_cmpunord_pd(a, b));
4790 assert_eq_m128i(r, e);
4791 }
4792
4793 #[simd_test(enable = "sse2")]
4794 fn test_mm_cmpneq_pd() {
4795 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4796 let e = _mm_setr_epi64x(!0, !0);
4797 let r = _mm_castpd_si128(_mm_cmpneq_pd(a, b));
4798 assert_eq_m128i(r, e);
4799 }
4800
4801 #[simd_test(enable = "sse2")]
4802 fn test_mm_cmpnlt_pd() {
4803 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4804 let e = _mm_setr_epi64x(0, 0);
4805 let r = _mm_castpd_si128(_mm_cmpnlt_pd(a, b));
4806 assert_eq_m128i(r, e);
4807 }
4808
4809 #[simd_test(enable = "sse2")]
4810 fn test_mm_cmpnle_pd() {
4811 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4812 let e = _mm_setr_epi64x(0, 0);
4813 let r = _mm_castpd_si128(_mm_cmpnle_pd(a, b));
4814 assert_eq_m128i(r, e);
4815 }
4816
4817 #[simd_test(enable = "sse2")]
4818 fn test_mm_cmpngt_pd() {
4819 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4820 let e = _mm_setr_epi64x(0, !0);
4821 let r = _mm_castpd_si128(_mm_cmpngt_pd(a, b));
4822 assert_eq_m128i(r, e);
4823 }
4824
4825 #[simd_test(enable = "sse2")]
4826 fn test_mm_cmpnge_pd() {
4827 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4828 let e = _mm_setr_epi64x(0, !0);
4829 let r = _mm_castpd_si128(_mm_cmpnge_pd(a, b));
4830 assert_eq_m128i(r, e);
4831 }
4832
4833 #[simd_test(enable = "sse2")]
4834 fn test_mm_comieq_sd() {
4835 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4836 assert!(_mm_comieq_sd(a, b) != 0);
4837
4838 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4839 assert!(_mm_comieq_sd(a, b) == 0);
4840 }
4841
4842 #[simd_test(enable = "sse2")]
4843 fn test_mm_comilt_sd() {
4844 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4845 assert!(_mm_comilt_sd(a, b) == 0);
4846 }
4847
4848 #[simd_test(enable = "sse2")]
4849 fn test_mm_comile_sd() {
4850 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4851 assert!(_mm_comile_sd(a, b) != 0);
4852 }
4853
4854 #[simd_test(enable = "sse2")]
4855 fn test_mm_comigt_sd() {
4856 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4857 assert!(_mm_comigt_sd(a, b) == 0);
4858 }
4859
4860 #[simd_test(enable = "sse2")]
4861 fn test_mm_comige_sd() {
4862 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4863 assert!(_mm_comige_sd(a, b) != 0);
4864 }
4865
4866 #[simd_test(enable = "sse2")]
4867 fn test_mm_comineq_sd() {
4868 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4869 assert!(_mm_comineq_sd(a, b) == 0);
4870 }
4871
4872 #[simd_test(enable = "sse2")]
4873 fn test_mm_ucomieq_sd() {
4874 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4875 assert!(_mm_ucomieq_sd(a, b) != 0);
4876
4877 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4878 assert!(_mm_ucomieq_sd(a, b) == 0);
4879 }
4880
4881 #[simd_test(enable = "sse2")]
4882 fn test_mm_ucomilt_sd() {
4883 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4884 assert!(_mm_ucomilt_sd(a, b) == 0);
4885 }
4886
4887 #[simd_test(enable = "sse2")]
4888 fn test_mm_ucomile_sd() {
4889 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4890 assert!(_mm_ucomile_sd(a, b) != 0);
4891 }
4892
4893 #[simd_test(enable = "sse2")]
4894 fn test_mm_ucomigt_sd() {
4895 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4896 assert!(_mm_ucomigt_sd(a, b) == 0);
4897 }
4898
4899 #[simd_test(enable = "sse2")]
4900 fn test_mm_ucomige_sd() {
4901 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4902 assert!(_mm_ucomige_sd(a, b) != 0);
4903 }
4904
4905 #[simd_test(enable = "sse2")]
4906 fn test_mm_ucomineq_sd() {
4907 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4908 assert!(_mm_ucomineq_sd(a, b) == 0);
4909 }
4910
4911 #[simd_test(enable = "sse2")]
4912 const fn test_mm_movemask_pd() {
4913 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4914 assert_eq!(r, 0b01);
4915
4916 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4917 assert_eq!(r, 0b11);
4918 }
4919
4920 #[repr(align(16))]
4921 struct Memory {
4922 data: [f64; 4],
4923 }
4924
4925 #[simd_test(enable = "sse2")]
4926 const fn test_mm_load_pd() {
4927 let mem = Memory {
4928 data: [1.0f64, 2.0, 3.0, 4.0],
4929 };
4930 let vals = &mem.data;
4931 let d = vals.as_ptr();
4932
4933 let r = unsafe { _mm_load_pd(d) };
4934 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4935 }
4936
4937 #[simd_test(enable = "sse2")]
4938 const fn test_mm_load_sd() {
4939 let a = 1.;
4940 let expected = _mm_setr_pd(a, 0.);
4941 let r = unsafe { _mm_load_sd(&a) };
4942 assert_eq_m128d(r, expected);
4943 }
4944
4945 #[simd_test(enable = "sse2")]
4946 const fn test_mm_loadh_pd() {
4947 let a = _mm_setr_pd(1., 2.);
4948 let b = 3.;
4949 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4950 let r = unsafe { _mm_loadh_pd(a, &b) };
4951 assert_eq_m128d(r, expected);
4952 }
4953
4954 #[simd_test(enable = "sse2")]
4955 const fn test_mm_loadl_pd() {
4956 let a = _mm_setr_pd(1., 2.);
4957 let b = 3.;
4958 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4959 let r = unsafe { _mm_loadl_pd(a, &b) };
4960 assert_eq_m128d(r, expected);
4961 }
4962
4963 #[simd_test(enable = "sse2")]
4964 #[cfg_attr(miri, ignore)]
4967 fn test_mm_stream_pd() {
4968 #[repr(align(128))]
4969 struct Memory {
4970 pub data: [f64; 2],
4971 }
4972 let a = _mm_set1_pd(7.0);
4973 let mut mem = Memory { data: [-1.0; 2] };
4974
4975 unsafe {
4976 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4977 }
4978 _mm_sfence();
4979 for i in 0..2 {
4980 assert_eq!(mem.data[i], get_m128d(a, i));
4981 }
4982 }
4983
4984 #[simd_test(enable = "sse2")]
4985 const fn test_mm_store_sd() {
4986 let mut dest = 0.;
4987 let a = _mm_setr_pd(1., 2.);
4988 unsafe {
4989 _mm_store_sd(&mut dest, a);
4990 }
4991 assert_eq!(dest, _mm_cvtsd_f64(a));
4992 }
4993
4994 #[simd_test(enable = "sse2")]
4995 const fn test_mm_store_pd() {
4996 let mut mem = Memory { data: [0.0f64; 4] };
4997 let vals = &mut mem.data;
4998 let a = _mm_setr_pd(1.0, 2.0);
4999 let d = vals.as_mut_ptr();
5000
5001 unsafe {
5002 _mm_store_pd(d, *black_box(&a));
5003 }
5004 assert_eq!(vals[0], 1.0);
5005 assert_eq!(vals[1], 2.0);
5006 }
5007
5008 #[simd_test(enable = "sse2")]
5009 const fn test_mm_storeu_pd() {
5010 let mut mem = Memory { data: [0.0f64; 4] };
5012 let vals = &mut mem.data;
5013 let a = _mm_setr_pd(1.0, 2.0);
5014
5015 unsafe {
5017 let p = vals.as_mut_ptr().offset(1);
5018 _mm_storeu_pd(p, *black_box(&a));
5019 }
5020
5021 assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
5022 }
5023
5024 #[simd_test(enable = "sse2")]
5025 const fn test_mm_storeu_si16() {
5026 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5027 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
5028 unsafe {
5029 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
5030 }
5031 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
5032 assert_eq_m128i(r, e);
5033 }
5034
5035 #[simd_test(enable = "sse2")]
5036 const fn test_mm_storeu_si32() {
5037 let a = _mm_setr_epi32(1, 2, 3, 4);
5038 let mut r = _mm_setr_epi32(5, 6, 7, 8);
5039 unsafe {
5040 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
5041 }
5042 let e = _mm_setr_epi32(1, 6, 7, 8);
5043 assert_eq_m128i(r, e);
5044 }
5045
5046 #[simd_test(enable = "sse2")]
5047 const fn test_mm_storeu_si64() {
5048 let a = _mm_setr_epi64x(1, 2);
5049 let mut r = _mm_setr_epi64x(3, 4);
5050 unsafe {
5051 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
5052 }
5053 let e = _mm_setr_epi64x(1, 4);
5054 assert_eq_m128i(r, e);
5055 }
5056
5057 #[simd_test(enable = "sse2")]
5058 const fn test_mm_store1_pd() {
5059 let mut mem = Memory { data: [0.0f64; 4] };
5060 let vals = &mut mem.data;
5061 let a = _mm_setr_pd(1.0, 2.0);
5062 let d = vals.as_mut_ptr();
5063
5064 unsafe {
5065 _mm_store1_pd(d, *black_box(&a));
5066 }
5067 assert_eq!(vals[0], 1.0);
5068 assert_eq!(vals[1], 1.0);
5069 }
5070
5071 #[simd_test(enable = "sse2")]
5072 const fn test_mm_store_pd1() {
5073 let mut mem = Memory { data: [0.0f64; 4] };
5074 let vals = &mut mem.data;
5075 let a = _mm_setr_pd(1.0, 2.0);
5076 let d = vals.as_mut_ptr();
5077
5078 unsafe {
5079 _mm_store_pd1(d, *black_box(&a));
5080 }
5081 assert_eq!(vals[0], 1.0);
5082 assert_eq!(vals[1], 1.0);
5083 }
5084
5085 #[simd_test(enable = "sse2")]
5086 const fn test_mm_storer_pd() {
5087 let mut mem = Memory { data: [0.0f64; 4] };
5088 let vals = &mut mem.data;
5089 let a = _mm_setr_pd(1.0, 2.0);
5090 let d = vals.as_mut_ptr();
5091
5092 unsafe {
5093 _mm_storer_pd(d, *black_box(&a));
5094 }
5095 assert_eq!(vals[0], 2.0);
5096 assert_eq!(vals[1], 1.0);
5097 }
5098
5099 #[simd_test(enable = "sse2")]
5100 const fn test_mm_storeh_pd() {
5101 let mut dest = 0.;
5102 let a = _mm_setr_pd(1., 2.);
5103 unsafe {
5104 _mm_storeh_pd(&mut dest, a);
5105 }
5106 assert_eq!(dest, get_m128d(a, 1));
5107 }
5108
5109 #[simd_test(enable = "sse2")]
5110 const fn test_mm_storel_pd() {
5111 let mut dest = 0.;
5112 let a = _mm_setr_pd(1., 2.);
5113 unsafe {
5114 _mm_storel_pd(&mut dest, a);
5115 }
5116 assert_eq!(dest, _mm_cvtsd_f64(a));
5117 }
5118
5119 #[simd_test(enable = "sse2")]
5120 const fn test_mm_loadr_pd() {
5121 let mut mem = Memory {
5122 data: [1.0f64, 2.0, 3.0, 4.0],
5123 };
5124 let vals = &mut mem.data;
5125 let d = vals.as_ptr();
5126
5127 let r = unsafe { _mm_loadr_pd(d) };
5128 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
5129 }
5130
5131 #[simd_test(enable = "sse2")]
5132 const fn test_mm_loadu_pd() {
5133 let mut mem = Memory {
5135 data: [1.0f64, 2.0, 3.0, 4.0],
5136 };
5137 let vals = &mut mem.data;
5138
5139 let d = unsafe { vals.as_ptr().offset(1) };
5141
5142 let r = unsafe { _mm_loadu_pd(d) };
5143 let e = _mm_setr_pd(2.0, 3.0);
5144 assert_eq_m128d(r, e);
5145 }
5146
5147 #[simd_test(enable = "sse2")]
5148 const fn test_mm_loadu_si16() {
5149 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5150 let r = unsafe { _mm_loadu_si16(ptr::addr_of!(a) as *const _) };
5151 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
5152 }
5153
5154 #[simd_test(enable = "sse2")]
5155 const fn test_mm_loadu_si32() {
5156 let a = _mm_setr_epi32(1, 2, 3, 4);
5157 let r = unsafe { _mm_loadu_si32(ptr::addr_of!(a) as *const _) };
5158 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
5159 }
5160
5161 #[simd_test(enable = "sse2")]
5162 const fn test_mm_loadu_si64() {
5163 let a = _mm_setr_epi64x(5, 6);
5164 let r = unsafe { _mm_loadu_si64(ptr::addr_of!(a) as *const _) };
5165 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5166 }
5167
5168 #[simd_test(enable = "sse2")]
5169 const fn test_mm_cvtpd_ps() {
5170 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5171 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5172
5173 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5174 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5175
5176 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5177 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5178
5179 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5180 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5181 }
5182
5183 #[simd_test(enable = "sse2")]
5184 const fn test_mm_cvtps_pd() {
5185 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5186 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5187
5188 let r = _mm_cvtps_pd(_mm_setr_ps(
5189 f32::MAX,
5190 f32::INFINITY,
5191 f32::NEG_INFINITY,
5192 f32::MIN,
5193 ));
5194 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5195 }
5196
5197 #[simd_test(enable = "sse2")]
5198 fn test_mm_cvtpd_epi32() {
5199 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5200 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5201
5202 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5203 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5204
5205 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5206 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5207
5208 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5209 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5210
5211 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5212 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5213 }
5214
5215 #[simd_test(enable = "sse2")]
5216 fn test_mm_cvtsd_si32() {
5217 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5218 assert_eq!(r, -2);
5219
5220 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5221 assert_eq!(r, i32::MIN);
5222
5223 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5224 assert_eq!(r, i32::MIN);
5225 }
5226
5227 #[simd_test(enable = "sse2")]
5228 fn test_mm_cvtsd_ss() {
5229 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5230 let b = _mm_setr_pd(2.0, -5.0);
5231
5232 let r = _mm_cvtsd_ss(a, b);
5233
5234 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5235
5236 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5237 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5238
5239 let r = _mm_cvtsd_ss(a, b);
5240
5241 assert_eq_m128(
5242 r,
5243 _mm_setr_ps(
5244 f32::INFINITY,
5245 f32::NEG_INFINITY,
5246 f32::MAX,
5247 f32::NEG_INFINITY,
5248 ),
5249 );
5250 }
5251
5252 #[simd_test(enable = "sse2")]
5253 const fn test_mm_cvtsd_f64() {
5254 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5255 assert_eq!(r, -1.1);
5256 }
5257
5258 #[simd_test(enable = "sse2")]
5259 const fn test_mm_cvtss_sd() {
5260 let a = _mm_setr_pd(-1.1, 2.2);
5261 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5262
5263 let r = _mm_cvtss_sd(a, b);
5264 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5265
5266 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5267 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5268
5269 let r = _mm_cvtss_sd(a, b);
5270 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5271 }
5272
5273 #[simd_test(enable = "sse2")]
5274 fn test_mm_cvttpd_epi32() {
5275 let a = _mm_setr_pd(-1.1, 2.2);
5276 let r = _mm_cvttpd_epi32(a);
5277 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5278
5279 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5280 let r = _mm_cvttpd_epi32(a);
5281 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5282 }
5283
5284 #[simd_test(enable = "sse2")]
5285 fn test_mm_cvttsd_si32() {
5286 let a = _mm_setr_pd(-1.1, 2.2);
5287 let r = _mm_cvttsd_si32(a);
5288 assert_eq!(r, -1);
5289
5290 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5291 let r = _mm_cvttsd_si32(a);
5292 assert_eq!(r, i32::MIN);
5293 }
5294
5295 #[simd_test(enable = "sse2")]
5296 fn test_mm_cvttps_epi32() {
5297 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5298 let r = _mm_cvttps_epi32(a);
5299 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5300
5301 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5302 let r = _mm_cvttps_epi32(a);
5303 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5304 }
5305
5306 #[simd_test(enable = "sse2")]
5307 const fn test_mm_set_sd() {
5308 let r = _mm_set_sd(-1.0_f64);
5309 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5310 }
5311
5312 #[simd_test(enable = "sse2")]
5313 const fn test_mm_set1_pd() {
5314 let r = _mm_set1_pd(-1.0_f64);
5315 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5316 }
5317
5318 #[simd_test(enable = "sse2")]
5319 const fn test_mm_set_pd1() {
5320 let r = _mm_set_pd1(-2.0_f64);
5321 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5322 }
5323
5324 #[simd_test(enable = "sse2")]
5325 const fn test_mm_set_pd() {
5326 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5327 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5328 }
5329
5330 #[simd_test(enable = "sse2")]
5331 const fn test_mm_setr_pd() {
5332 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5333 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5334 }
5335
5336 #[simd_test(enable = "sse2")]
5337 const fn test_mm_setzero_pd() {
5338 let r = _mm_setzero_pd();
5339 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5340 }
5341
5342 #[simd_test(enable = "sse2")]
5343 const fn test_mm_load1_pd() {
5344 let d = -5.0;
5345 let r = unsafe { _mm_load1_pd(&d) };
5346 assert_eq_m128d(r, _mm_setr_pd(d, d));
5347 }
5348
5349 #[simd_test(enable = "sse2")]
5350 const fn test_mm_load_pd1() {
5351 let d = -5.0;
5352 let r = unsafe { _mm_load_pd1(&d) };
5353 assert_eq_m128d(r, _mm_setr_pd(d, d));
5354 }
5355
5356 #[simd_test(enable = "sse2")]
5357 const fn test_mm_unpackhi_pd() {
5358 let a = _mm_setr_pd(1.0, 2.0);
5359 let b = _mm_setr_pd(3.0, 4.0);
5360 let r = _mm_unpackhi_pd(a, b);
5361 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5362 }
5363
5364 #[simd_test(enable = "sse2")]
5365 const fn test_mm_unpacklo_pd() {
5366 let a = _mm_setr_pd(1.0, 2.0);
5367 let b = _mm_setr_pd(3.0, 4.0);
5368 let r = _mm_unpacklo_pd(a, b);
5369 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5370 }
5371
5372 #[simd_test(enable = "sse2")]
5373 const fn test_mm_shuffle_pd() {
5374 let a = _mm_setr_pd(1., 2.);
5375 let b = _mm_setr_pd(3., 4.);
5376 let expected = _mm_setr_pd(1., 3.);
5377 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5378 assert_eq_m128d(r, expected);
5379 }
5380
5381 #[simd_test(enable = "sse2")]
5382 const fn test_mm_move_sd() {
5383 let a = _mm_setr_pd(1., 2.);
5384 let b = _mm_setr_pd(3., 4.);
5385 let expected = _mm_setr_pd(3., 2.);
5386 let r = _mm_move_sd(a, b);
5387 assert_eq_m128d(r, expected);
5388 }
5389
5390 #[simd_test(enable = "sse2")]
5391 const fn test_mm_castpd_ps() {
5392 let a = _mm_set1_pd(0.);
5393 let expected = _mm_set1_ps(0.);
5394 let r = _mm_castpd_ps(a);
5395 assert_eq_m128(r, expected);
5396 }
5397
5398 #[simd_test(enable = "sse2")]
5399 const fn test_mm_castpd_si128() {
5400 let a = _mm_set1_pd(0.);
5401 let expected = _mm_set1_epi64x(0);
5402 let r = _mm_castpd_si128(a);
5403 assert_eq_m128i(r, expected);
5404 }
5405
5406 #[simd_test(enable = "sse2")]
5407 const fn test_mm_castps_pd() {
5408 let a = _mm_set1_ps(0.);
5409 let expected = _mm_set1_pd(0.);
5410 let r = _mm_castps_pd(a);
5411 assert_eq_m128d(r, expected);
5412 }
5413
5414 #[simd_test(enable = "sse2")]
5415 const fn test_mm_castps_si128() {
5416 let a = _mm_set1_ps(0.);
5417 let expected = _mm_set1_epi32(0);
5418 let r = _mm_castps_si128(a);
5419 assert_eq_m128i(r, expected);
5420 }
5421
5422 #[simd_test(enable = "sse2")]
5423 const fn test_mm_castsi128_pd() {
5424 let a = _mm_set1_epi64x(0);
5425 let expected = _mm_set1_pd(0.);
5426 let r = _mm_castsi128_pd(a);
5427 assert_eq_m128d(r, expected);
5428 }
5429
5430 #[simd_test(enable = "sse2")]
5431 const fn test_mm_castsi128_ps() {
5432 let a = _mm_set1_epi32(0);
5433 let expected = _mm_set1_ps(0.);
5434 let r = _mm_castsi128_ps(a);
5435 assert_eq_m128(r, expected);
5436 }
5437}