1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23 unsafe { pause() }
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub fn _mm_lfence() {
53 unsafe { lfence() }
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _mm_mfence() {
69 unsafe { mfence() }
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
80pub const fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
81 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
82}
83
84#[inline]
88#[target_feature(enable = "sse2")]
89#[cfg_attr(test, assert_instr(paddw))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
92pub const fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
93 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
94}
95
96#[inline]
100#[target_feature(enable = "sse2")]
101#[cfg_attr(test, assert_instr(paddd))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
104pub const fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
105 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
106}
107
108#[inline]
112#[target_feature(enable = "sse2")]
113#[cfg_attr(test, assert_instr(paddq))]
114#[stable(feature = "simd_x86", since = "1.27.0")]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
117 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
118}
119
120#[inline]
124#[target_feature(enable = "sse2")]
125#[cfg_attr(test, assert_instr(paddsb))]
126#[stable(feature = "simd_x86", since = "1.27.0")]
127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
128pub const fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
129 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
130}
131
132#[inline]
136#[target_feature(enable = "sse2")]
137#[cfg_attr(test, assert_instr(paddsw))]
138#[stable(feature = "simd_x86", since = "1.27.0")]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
141 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
142}
143
144#[inline]
148#[target_feature(enable = "sse2")]
149#[cfg_attr(test, assert_instr(paddusb))]
150#[stable(feature = "simd_x86", since = "1.27.0")]
151#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
152pub const fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
153 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
154}
155
156#[inline]
160#[target_feature(enable = "sse2")]
161#[cfg_attr(test, assert_instr(paddusw))]
162#[stable(feature = "simd_x86", since = "1.27.0")]
163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
164pub const fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
165 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
166}
167
168#[inline]
172#[target_feature(enable = "sse2")]
173#[cfg_attr(test, assert_instr(pavgb))]
174#[stable(feature = "simd_x86", since = "1.27.0")]
175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
176pub const fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
177 unsafe {
178 let a = simd_cast::<_, u16x16>(a.as_u8x16());
179 let b = simd_cast::<_, u16x16>(b.as_u8x16());
180 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
181 transmute(simd_cast::<_, u8x16>(r))
182 }
183}
184
185#[inline]
189#[target_feature(enable = "sse2")]
190#[cfg_attr(test, assert_instr(pavgw))]
191#[stable(feature = "simd_x86", since = "1.27.0")]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
194 unsafe {
195 let a = simd_cast::<_, u32x8>(a.as_u16x8());
196 let b = simd_cast::<_, u32x8>(b.as_u16x8());
197 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
198 transmute(simd_cast::<_, u16x8>(r))
199 }
200}
201
202#[inline]
210#[target_feature(enable = "sse2")]
211#[cfg_attr(test, assert_instr(pmaddwd))]
212#[stable(feature = "simd_x86", since = "1.27.0")]
213pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
214 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
227}
228
229#[inline]
234#[target_feature(enable = "sse2")]
235#[cfg_attr(test, assert_instr(pmaxsw))]
236#[stable(feature = "simd_x86", since = "1.27.0")]
237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
238pub const fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
239 unsafe { simd_imax(a.as_i16x8(), b.as_i16x8()).as_m128i() }
240}
241
242#[inline]
247#[target_feature(enable = "sse2")]
248#[cfg_attr(test, assert_instr(pmaxub))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
251pub const fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
252 unsafe { simd_imax(a.as_u8x16(), b.as_u8x16()).as_m128i() }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminsw))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
264pub const fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
265 unsafe { simd_imin(a.as_i16x8(), b.as_i16x8()).as_m128i() }
266}
267
268#[inline]
273#[target_feature(enable = "sse2")]
274#[cfg_attr(test, assert_instr(pminub))]
275#[stable(feature = "simd_x86", since = "1.27.0")]
276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
277pub const fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
278 unsafe { simd_imin(a.as_u8x16(), b.as_u8x16()).as_m128i() }
279}
280
281#[inline]
288#[target_feature(enable = "sse2")]
289#[cfg_attr(test, assert_instr(pmulhw))]
290#[stable(feature = "simd_x86", since = "1.27.0")]
291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
292pub const fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
293 unsafe {
294 let a = simd_cast::<_, i32x8>(a.as_i16x8());
295 let b = simd_cast::<_, i32x8>(b.as_i16x8());
296 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
297 transmute(simd_cast::<i32x8, i16x8>(r))
298 }
299}
300
301#[inline]
308#[target_feature(enable = "sse2")]
309#[cfg_attr(test, assert_instr(pmulhuw))]
310#[stable(feature = "simd_x86", since = "1.27.0")]
311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
312pub const fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
313 unsafe {
314 let a = simd_cast::<_, u32x8>(a.as_u16x8());
315 let b = simd_cast::<_, u32x8>(b.as_u16x8());
316 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
317 transmute(simd_cast::<u32x8, u16x8>(r))
318 }
319}
320
321#[inline]
328#[target_feature(enable = "sse2")]
329#[cfg_attr(test, assert_instr(pmullw))]
330#[stable(feature = "simd_x86", since = "1.27.0")]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
333 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
334}
335
336#[inline]
343#[target_feature(enable = "sse2")]
344#[cfg_attr(test, assert_instr(pmuludq))]
345#[stable(feature = "simd_x86", since = "1.27.0")]
346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347pub const fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
348 unsafe {
349 let a = a.as_u64x2();
350 let b = b.as_u64x2();
351 let mask = u64x2::splat(u32::MAX as u64);
352 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
353 }
354}
355
356#[inline]
365#[target_feature(enable = "sse2")]
366#[cfg_attr(test, assert_instr(psadbw))]
367#[stable(feature = "simd_x86", since = "1.27.0")]
368pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
369 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
370}
371
372#[inline]
376#[target_feature(enable = "sse2")]
377#[cfg_attr(test, assert_instr(psubb))]
378#[stable(feature = "simd_x86", since = "1.27.0")]
379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
380pub const fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
381 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
382}
383
384#[inline]
388#[target_feature(enable = "sse2")]
389#[cfg_attr(test, assert_instr(psubw))]
390#[stable(feature = "simd_x86", since = "1.27.0")]
391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392pub const fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
393 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
394}
395
396#[inline]
400#[target_feature(enable = "sse2")]
401#[cfg_attr(test, assert_instr(psubd))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
405 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
406}
407
408#[inline]
412#[target_feature(enable = "sse2")]
413#[cfg_attr(test, assert_instr(psubq))]
414#[stable(feature = "simd_x86", since = "1.27.0")]
415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
416pub const fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
417 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
418}
419
420#[inline]
425#[target_feature(enable = "sse2")]
426#[cfg_attr(test, assert_instr(psubsb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
430 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
431}
432
433#[inline]
438#[target_feature(enable = "sse2")]
439#[cfg_attr(test, assert_instr(psubsw))]
440#[stable(feature = "simd_x86", since = "1.27.0")]
441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
442pub const fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
443 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
444}
445
446#[inline]
451#[target_feature(enable = "sse2")]
452#[cfg_attr(test, assert_instr(psubusb))]
453#[stable(feature = "simd_x86", since = "1.27.0")]
454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
455pub const fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
456 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
457}
458
459#[inline]
464#[target_feature(enable = "sse2")]
465#[cfg_attr(test, assert_instr(psubusw))]
466#[stable(feature = "simd_x86", since = "1.27.0")]
467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
468pub const fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
469 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
470}
471
472#[inline]
476#[target_feature(enable = "sse2")]
477#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
478#[rustc_legacy_const_generics(1)]
479#[stable(feature = "simd_x86", since = "1.27.0")]
480#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
481pub const fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
482 static_assert_uimm_bits!(IMM8, 8);
483 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
484}
485
486#[inline]
489#[target_feature(enable = "sse2")]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491const unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
492 const fn mask(shift: i32, i: u32) -> u32 {
493 let shift = shift as u32 & 0xff;
494 if shift > 15 { i } else { 16 - shift + i }
495 }
496 transmute::<i8x16, _>(simd_shuffle!(
497 i8x16::ZERO,
498 a.as_i8x16(),
499 [
500 mask(IMM8, 0),
501 mask(IMM8, 1),
502 mask(IMM8, 2),
503 mask(IMM8, 3),
504 mask(IMM8, 4),
505 mask(IMM8, 5),
506 mask(IMM8, 6),
507 mask(IMM8, 7),
508 mask(IMM8, 8),
509 mask(IMM8, 9),
510 mask(IMM8, 10),
511 mask(IMM8, 11),
512 mask(IMM8, 12),
513 mask(IMM8, 13),
514 mask(IMM8, 14),
515 mask(IMM8, 15),
516 ],
517 ))
518}
519
520#[inline]
524#[target_feature(enable = "sse2")]
525#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
526#[rustc_legacy_const_generics(1)]
527#[stable(feature = "simd_x86", since = "1.27.0")]
528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
529pub const fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
530 unsafe {
531 static_assert_uimm_bits!(IMM8, 8);
532 _mm_slli_si128_impl::<IMM8>(a)
533 }
534}
535
536#[inline]
540#[target_feature(enable = "sse2")]
541#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
542#[rustc_legacy_const_generics(1)]
543#[stable(feature = "simd_x86", since = "1.27.0")]
544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
545pub const fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
546 unsafe {
547 static_assert_uimm_bits!(IMM8, 8);
548 _mm_srli_si128_impl::<IMM8>(a)
549 }
550}
551
552#[inline]
556#[target_feature(enable = "sse2")]
557#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
558#[rustc_legacy_const_generics(1)]
559#[stable(feature = "simd_x86", since = "1.27.0")]
560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
561pub const fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
562 static_assert_uimm_bits!(IMM8, 8);
563 unsafe {
564 if IMM8 >= 16 {
565 _mm_setzero_si128()
566 } else {
567 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
568 }
569 }
570}
571
572#[inline]
577#[target_feature(enable = "sse2")]
578#[cfg_attr(test, assert_instr(psllw))]
579#[stable(feature = "simd_x86", since = "1.27.0")]
580pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
581 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
582}
583
584#[inline]
588#[target_feature(enable = "sse2")]
589#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
590#[rustc_legacy_const_generics(1)]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
594 static_assert_uimm_bits!(IMM8, 8);
595 unsafe {
596 if IMM8 >= 32 {
597 _mm_setzero_si128()
598 } else {
599 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
600 }
601 }
602}
603
604#[inline]
609#[target_feature(enable = "sse2")]
610#[cfg_attr(test, assert_instr(pslld))]
611#[stable(feature = "simd_x86", since = "1.27.0")]
612pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
613 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
614}
615
616#[inline]
620#[target_feature(enable = "sse2")]
621#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
622#[rustc_legacy_const_generics(1)]
623#[stable(feature = "simd_x86", since = "1.27.0")]
624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
625pub const fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
626 static_assert_uimm_bits!(IMM8, 8);
627 unsafe {
628 if IMM8 >= 64 {
629 _mm_setzero_si128()
630 } else {
631 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
632 }
633 }
634}
635
636#[inline]
641#[target_feature(enable = "sse2")]
642#[cfg_attr(test, assert_instr(psllq))]
643#[stable(feature = "simd_x86", since = "1.27.0")]
644pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
645 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
646}
647
648#[inline]
653#[target_feature(enable = "sse2")]
654#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
655#[rustc_legacy_const_generics(1)]
656#[stable(feature = "simd_x86", since = "1.27.0")]
657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
658pub const fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
659 static_assert_uimm_bits!(IMM8, 8);
660 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
661}
662
663#[inline]
668#[target_feature(enable = "sse2")]
669#[cfg_attr(test, assert_instr(psraw))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
672 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
673}
674
675#[inline]
680#[target_feature(enable = "sse2")]
681#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
682#[rustc_legacy_const_generics(1)]
683#[stable(feature = "simd_x86", since = "1.27.0")]
684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
685pub const fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
686 static_assert_uimm_bits!(IMM8, 8);
687 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
688}
689
690#[inline]
695#[target_feature(enable = "sse2")]
696#[cfg_attr(test, assert_instr(psrad))]
697#[stable(feature = "simd_x86", since = "1.27.0")]
698pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
699 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
700}
701
702#[inline]
706#[target_feature(enable = "sse2")]
707#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
708#[rustc_legacy_const_generics(1)]
709#[stable(feature = "simd_x86", since = "1.27.0")]
710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
711pub const fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
712 static_assert_uimm_bits!(IMM8, 8);
713 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
714}
715
716#[inline]
719#[target_feature(enable = "sse2")]
720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
721const unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
722 const fn mask(shift: i32, i: u32) -> u32 {
723 if (shift as u32) > 15 {
724 i + 16
725 } else {
726 i + (shift as u32)
727 }
728 }
729 let x: i8x16 = simd_shuffle!(
730 a.as_i8x16(),
731 i8x16::ZERO,
732 [
733 mask(IMM8, 0),
734 mask(IMM8, 1),
735 mask(IMM8, 2),
736 mask(IMM8, 3),
737 mask(IMM8, 4),
738 mask(IMM8, 5),
739 mask(IMM8, 6),
740 mask(IMM8, 7),
741 mask(IMM8, 8),
742 mask(IMM8, 9),
743 mask(IMM8, 10),
744 mask(IMM8, 11),
745 mask(IMM8, 12),
746 mask(IMM8, 13),
747 mask(IMM8, 14),
748 mask(IMM8, 15),
749 ],
750 );
751 transmute(x)
752}
753
754#[inline]
759#[target_feature(enable = "sse2")]
760#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
761#[rustc_legacy_const_generics(1)]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764pub const fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
765 static_assert_uimm_bits!(IMM8, 8);
766 unsafe {
767 if IMM8 >= 16 {
768 _mm_setzero_si128()
769 } else {
770 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
771 }
772 }
773}
774
775#[inline]
780#[target_feature(enable = "sse2")]
781#[cfg_attr(test, assert_instr(psrlw))]
782#[stable(feature = "simd_x86", since = "1.27.0")]
783pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
784 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
785}
786
787#[inline]
792#[target_feature(enable = "sse2")]
793#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
794#[rustc_legacy_const_generics(1)]
795#[stable(feature = "simd_x86", since = "1.27.0")]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
798 static_assert_uimm_bits!(IMM8, 8);
799 unsafe {
800 if IMM8 >= 32 {
801 _mm_setzero_si128()
802 } else {
803 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
804 }
805 }
806}
807
808#[inline]
813#[target_feature(enable = "sse2")]
814#[cfg_attr(test, assert_instr(psrld))]
815#[stable(feature = "simd_x86", since = "1.27.0")]
816pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
817 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
818}
819
820#[inline]
825#[target_feature(enable = "sse2")]
826#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
827#[rustc_legacy_const_generics(1)]
828#[stable(feature = "simd_x86", since = "1.27.0")]
829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
830pub const fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
831 static_assert_uimm_bits!(IMM8, 8);
832 unsafe {
833 if IMM8 >= 64 {
834 _mm_setzero_si128()
835 } else {
836 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
837 }
838 }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(psrlq))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
850 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(andps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
862pub const fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
863 unsafe { simd_and(a, b) }
864}
865
866#[inline]
871#[target_feature(enable = "sse2")]
872#[cfg_attr(test, assert_instr(andnps))]
873#[stable(feature = "simd_x86", since = "1.27.0")]
874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
875pub const fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
876 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
877}
878
879#[inline]
884#[target_feature(enable = "sse2")]
885#[cfg_attr(test, assert_instr(orps))]
886#[stable(feature = "simd_x86", since = "1.27.0")]
887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
888pub const fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
889 unsafe { simd_or(a, b) }
890}
891
892#[inline]
897#[target_feature(enable = "sse2")]
898#[cfg_attr(test, assert_instr(xorps))]
899#[stable(feature = "simd_x86", since = "1.27.0")]
900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
901pub const fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
902 unsafe { simd_xor(a, b) }
903}
904
905#[inline]
909#[target_feature(enable = "sse2")]
910#[cfg_attr(test, assert_instr(pcmpeqb))]
911#[stable(feature = "simd_x86", since = "1.27.0")]
912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
913pub const fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
914 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
915}
916
917#[inline]
921#[target_feature(enable = "sse2")]
922#[cfg_attr(test, assert_instr(pcmpeqw))]
923#[stable(feature = "simd_x86", since = "1.27.0")]
924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
925pub const fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
926 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
927}
928
929#[inline]
933#[target_feature(enable = "sse2")]
934#[cfg_attr(test, assert_instr(pcmpeqd))]
935#[stable(feature = "simd_x86", since = "1.27.0")]
936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
937pub const fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
938 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
939}
940
941#[inline]
945#[target_feature(enable = "sse2")]
946#[cfg_attr(test, assert_instr(pcmpgtb))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtw))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
961pub const fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
962 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
963}
964
965#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(pcmpgtd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
973pub const fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
974 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
975}
976
977#[inline]
981#[target_feature(enable = "sse2")]
982#[cfg_attr(test, assert_instr(pcmpgtb))]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
985pub const fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
986 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
987}
988
989#[inline]
993#[target_feature(enable = "sse2")]
994#[cfg_attr(test, assert_instr(pcmpgtw))]
995#[stable(feature = "simd_x86", since = "1.27.0")]
996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
997pub const fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
998 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
999}
1000
1001#[inline]
1005#[target_feature(enable = "sse2")]
1006#[cfg_attr(test, assert_instr(pcmpgtd))]
1007#[stable(feature = "simd_x86", since = "1.27.0")]
1008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1009pub const fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
1010 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
1011}
1012
1013#[inline]
1018#[target_feature(enable = "sse2")]
1019#[cfg_attr(test, assert_instr(cvtdq2pd))]
1020#[stable(feature = "simd_x86", since = "1.27.0")]
1021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1022pub const fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
1023 unsafe {
1024 let a = a.as_i32x4();
1025 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
1026 }
1027}
1028
1029#[inline]
1034#[target_feature(enable = "sse2")]
1035#[cfg_attr(test, assert_instr(cvtsi2sd))]
1036#[stable(feature = "simd_x86", since = "1.27.0")]
1037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1038pub const fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
1039 unsafe { simd_insert!(a, 0, b as f64) }
1040}
1041
1042#[inline]
1047#[target_feature(enable = "sse2")]
1048#[cfg_attr(test, assert_instr(cvtdq2ps))]
1049#[stable(feature = "simd_x86", since = "1.27.0")]
1050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1051pub const fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1052 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1053}
1054
1055#[inline]
1060#[target_feature(enable = "sse2")]
1061#[cfg_attr(test, assert_instr(cvtps2dq))]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1064 unsafe { transmute(cvtps2dq(a)) }
1065}
1066
1067#[inline]
1072#[target_feature(enable = "sse2")]
1073#[stable(feature = "simd_x86", since = "1.27.0")]
1074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1075pub const fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1076 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1085#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1086pub const fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1087 unsafe { simd_extract!(a.as_i32x4(), 0) }
1088}
1089
1090#[inline]
1095#[target_feature(enable = "sse2")]
1096#[stable(feature = "simd_x86", since = "1.27.0")]
1098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1099pub const fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1100 unsafe { transmute(i64x2::new(e0, e1)) }
1101}
1102
1103#[inline]
1107#[target_feature(enable = "sse2")]
1108#[stable(feature = "simd_x86", since = "1.27.0")]
1110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1111pub const fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1112 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1113}
1114
1115#[inline]
1119#[target_feature(enable = "sse2")]
1120#[stable(feature = "simd_x86", since = "1.27.0")]
1122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1123pub const fn _mm_set_epi16(
1124 e7: i16,
1125 e6: i16,
1126 e5: i16,
1127 e4: i16,
1128 e3: i16,
1129 e2: i16,
1130 e1: i16,
1131 e0: i16,
1132) -> __m128i {
1133 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1134}
1135
1136#[inline]
1140#[target_feature(enable = "sse2")]
1141#[stable(feature = "simd_x86", since = "1.27.0")]
1143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1144pub const fn _mm_set_epi8(
1145 e15: i8,
1146 e14: i8,
1147 e13: i8,
1148 e12: i8,
1149 e11: i8,
1150 e10: i8,
1151 e9: i8,
1152 e8: i8,
1153 e7: i8,
1154 e6: i8,
1155 e5: i8,
1156 e4: i8,
1157 e3: i8,
1158 e2: i8,
1159 e1: i8,
1160 e0: i8,
1161) -> __m128i {
1162 unsafe {
1163 #[rustfmt::skip]
1164 transmute(i8x16::new(
1165 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1166 ))
1167 }
1168}
1169
1170#[inline]
1174#[target_feature(enable = "sse2")]
1175#[stable(feature = "simd_x86", since = "1.27.0")]
1177#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1178pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
1179 _mm_set_epi64x(a, a)
1180}
1181
1182#[inline]
1186#[target_feature(enable = "sse2")]
1187#[stable(feature = "simd_x86", since = "1.27.0")]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm_set1_epi32(a: i32) -> __m128i {
1191 _mm_set_epi32(a, a, a, a)
1192}
1193
1194#[inline]
1198#[target_feature(enable = "sse2")]
1199#[stable(feature = "simd_x86", since = "1.27.0")]
1201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1202pub const fn _mm_set1_epi16(a: i16) -> __m128i {
1203 _mm_set_epi16(a, a, a, a, a, a, a, a)
1204}
1205
1206#[inline]
1210#[target_feature(enable = "sse2")]
1211#[stable(feature = "simd_x86", since = "1.27.0")]
1213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214pub const fn _mm_set1_epi8(a: i8) -> __m128i {
1215 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[stable(feature = "simd_x86", since = "1.27.0")]
1225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1226pub const fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1227 _mm_set_epi32(e0, e1, e2, e3)
1228}
1229
1230#[inline]
1234#[target_feature(enable = "sse2")]
1235#[stable(feature = "simd_x86", since = "1.27.0")]
1237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1238pub const fn _mm_setr_epi16(
1239 e7: i16,
1240 e6: i16,
1241 e5: i16,
1242 e4: i16,
1243 e3: i16,
1244 e2: i16,
1245 e1: i16,
1246 e0: i16,
1247) -> __m128i {
1248 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1249}
1250
1251#[inline]
1255#[target_feature(enable = "sse2")]
1256#[stable(feature = "simd_x86", since = "1.27.0")]
1258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259pub const fn _mm_setr_epi8(
1260 e15: i8,
1261 e14: i8,
1262 e13: i8,
1263 e12: i8,
1264 e11: i8,
1265 e10: i8,
1266 e9: i8,
1267 e8: i8,
1268 e7: i8,
1269 e6: i8,
1270 e5: i8,
1271 e4: i8,
1272 e3: i8,
1273 e2: i8,
1274 e1: i8,
1275 e0: i8,
1276) -> __m128i {
1277 #[rustfmt::skip]
1278 _mm_set_epi8(
1279 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1280 )
1281}
1282
1283#[inline]
1287#[target_feature(enable = "sse2")]
1288#[cfg_attr(test, assert_instr(xorps))]
1289#[stable(feature = "simd_x86", since = "1.27.0")]
1290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1291pub const fn _mm_setzero_si128() -> __m128i {
1292 const { unsafe { mem::zeroed() } }
1293}
1294
1295#[inline]
1299#[target_feature(enable = "sse2")]
1300#[stable(feature = "simd_x86", since = "1.27.0")]
1301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1302pub const unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1303 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1304}
1305
1306#[inline]
1312#[target_feature(enable = "sse2")]
1313#[cfg_attr(
1314 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1315 assert_instr(movaps)
1316)]
1317#[stable(feature = "simd_x86", since = "1.27.0")]
1318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1319pub const unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1320 *mem_addr
1321}
1322
1323#[inline]
1329#[target_feature(enable = "sse2")]
1330#[cfg_attr(test, assert_instr(movups))]
1331#[stable(feature = "simd_x86", since = "1.27.0")]
1332#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1333pub const unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1334 let mut dst: __m128i = _mm_undefined_si128();
1335 ptr::copy_nonoverlapping(
1336 mem_addr as *const u8,
1337 ptr::addr_of_mut!(dst) as *mut u8,
1338 mem::size_of::<__m128i>(),
1339 );
1340 dst
1341}
1342
1343#[inline]
1363#[target_feature(enable = "sse2")]
1364#[cfg_attr(test, assert_instr(maskmovdqu))]
1365#[stable(feature = "simd_x86", since = "1.27.0")]
1366pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1367 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1368}
1369
1370#[inline]
1376#[target_feature(enable = "sse2")]
1377#[cfg_attr(
1378 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1379 assert_instr(movaps)
1380)]
1381#[stable(feature = "simd_x86", since = "1.27.0")]
1382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1383pub const unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1384 *mem_addr = a;
1385}
1386
1387#[inline]
1393#[target_feature(enable = "sse2")]
1394#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1397pub const unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1398 mem_addr.write_unaligned(a);
1399}
1400
1401#[inline]
1407#[target_feature(enable = "sse2")]
1408#[stable(feature = "simd_x86", since = "1.27.0")]
1409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1410pub const unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1411 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1412}
1413
1414#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(movntdq))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1433 crate::arch::asm!(
1435 vps!("movntdq", ",{a}"),
1436 p = in(reg) mem_addr,
1437 a = in(xmm_reg) a,
1438 options(nostack, preserves_flags),
1439 );
1440}
1441
1442#[inline]
1457#[target_feature(enable = "sse2")]
1458#[cfg_attr(test, assert_instr(movnti))]
1459#[stable(feature = "simd_x86", since = "1.27.0")]
1460pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1461 crate::arch::asm!(
1463 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1465 a = in(reg) a,
1466 options(nostack, preserves_flags),
1467 );
1468}
1469
1470#[inline]
1475#[target_feature(enable = "sse2")]
1476#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1478#[stable(feature = "simd_x86", since = "1.27.0")]
1479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1480pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
1481 unsafe {
1482 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1483 transmute(r)
1484 }
1485}
1486
1487#[inline]
1492#[target_feature(enable = "sse2")]
1493#[cfg_attr(test, assert_instr(packsswb))]
1494#[stable(feature = "simd_x86", since = "1.27.0")]
1495pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1496 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1497}
1498
1499#[inline]
1504#[target_feature(enable = "sse2")]
1505#[cfg_attr(test, assert_instr(packssdw))]
1506#[stable(feature = "simd_x86", since = "1.27.0")]
1507pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1508 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1509}
1510
1511#[inline]
1516#[target_feature(enable = "sse2")]
1517#[cfg_attr(test, assert_instr(packuswb))]
1518#[stable(feature = "simd_x86", since = "1.27.0")]
1519pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1520 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1521}
1522
1523#[inline]
1527#[target_feature(enable = "sse2")]
1528#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1529#[rustc_legacy_const_generics(1)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1532pub const fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1533 static_assert_uimm_bits!(IMM8, 3);
1534 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1535}
1536
1537#[inline]
1541#[target_feature(enable = "sse2")]
1542#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1543#[rustc_legacy_const_generics(2)]
1544#[stable(feature = "simd_x86", since = "1.27.0")]
1545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1546pub const fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1547 static_assert_uimm_bits!(IMM8, 3);
1548 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1549}
1550
1551#[inline]
1555#[target_feature(enable = "sse2")]
1556#[cfg_attr(test, assert_instr(pmovmskb))]
1557#[stable(feature = "simd_x86", since = "1.27.0")]
1558#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1559pub const fn _mm_movemask_epi8(a: __m128i) -> i32 {
1560 unsafe {
1561 let z = i8x16::ZERO;
1562 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1563 simd_bitmask::<_, u16>(m) as u32 as i32
1564 }
1565}
1566
1567#[inline]
1571#[target_feature(enable = "sse2")]
1572#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1573#[rustc_legacy_const_generics(1)]
1574#[stable(feature = "simd_x86", since = "1.27.0")]
1575#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1576pub const fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1577 static_assert_uimm_bits!(IMM8, 8);
1578 unsafe {
1579 let a = a.as_i32x4();
1580 let x: i32x4 = simd_shuffle!(
1581 a,
1582 a,
1583 [
1584 IMM8 as u32 & 0b11,
1585 (IMM8 as u32 >> 2) & 0b11,
1586 (IMM8 as u32 >> 4) & 0b11,
1587 (IMM8 as u32 >> 6) & 0b11,
1588 ],
1589 );
1590 transmute(x)
1591 }
1592}
1593
1594#[inline]
1602#[target_feature(enable = "sse2")]
1603#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1604#[rustc_legacy_const_generics(1)]
1605#[stable(feature = "simd_x86", since = "1.27.0")]
1606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1607pub const fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1608 static_assert_uimm_bits!(IMM8, 8);
1609 unsafe {
1610 let a = a.as_i16x8();
1611 let x: i16x8 = simd_shuffle!(
1612 a,
1613 a,
1614 [
1615 0,
1616 1,
1617 2,
1618 3,
1619 (IMM8 as u32 & 0b11) + 4,
1620 ((IMM8 as u32 >> 2) & 0b11) + 4,
1621 ((IMM8 as u32 >> 4) & 0b11) + 4,
1622 ((IMM8 as u32 >> 6) & 0b11) + 4,
1623 ],
1624 );
1625 transmute(x)
1626 }
1627}
1628
1629#[inline]
1637#[target_feature(enable = "sse2")]
1638#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1639#[rustc_legacy_const_generics(1)]
1640#[stable(feature = "simd_x86", since = "1.27.0")]
1641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1642pub const fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1643 static_assert_uimm_bits!(IMM8, 8);
1644 unsafe {
1645 let a = a.as_i16x8();
1646 let x: i16x8 = simd_shuffle!(
1647 a,
1648 a,
1649 [
1650 IMM8 as u32 & 0b11,
1651 (IMM8 as u32 >> 2) & 0b11,
1652 (IMM8 as u32 >> 4) & 0b11,
1653 (IMM8 as u32 >> 6) & 0b11,
1654 4,
1655 5,
1656 6,
1657 7,
1658 ],
1659 );
1660 transmute(x)
1661 }
1662}
1663
1664#[inline]
1668#[target_feature(enable = "sse2")]
1669#[cfg_attr(test, assert_instr(punpckhbw))]
1670#[stable(feature = "simd_x86", since = "1.27.0")]
1671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1672pub const fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1673 unsafe {
1674 transmute::<i8x16, _>(simd_shuffle!(
1675 a.as_i8x16(),
1676 b.as_i8x16(),
1677 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1678 ))
1679 }
1680}
1681
1682#[inline]
1686#[target_feature(enable = "sse2")]
1687#[cfg_attr(test, assert_instr(punpckhwd))]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1690pub const fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1691 unsafe {
1692 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1693 transmute::<i16x8, _>(x)
1694 }
1695}
1696
1697#[inline]
1701#[target_feature(enable = "sse2")]
1702#[cfg_attr(test, assert_instr(unpckhps))]
1703#[stable(feature = "simd_x86", since = "1.27.0")]
1704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1705pub const fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1706 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1707}
1708
1709#[inline]
1713#[target_feature(enable = "sse2")]
1714#[cfg_attr(test, assert_instr(unpckhpd))]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1717pub const fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1718 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1719}
1720
1721#[inline]
1725#[target_feature(enable = "sse2")]
1726#[cfg_attr(test, assert_instr(punpcklbw))]
1727#[stable(feature = "simd_x86", since = "1.27.0")]
1728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1729pub const fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1730 unsafe {
1731 transmute::<i8x16, _>(simd_shuffle!(
1732 a.as_i8x16(),
1733 b.as_i8x16(),
1734 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1735 ))
1736 }
1737}
1738
1739#[inline]
1743#[target_feature(enable = "sse2")]
1744#[cfg_attr(test, assert_instr(punpcklwd))]
1745#[stable(feature = "simd_x86", since = "1.27.0")]
1746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1747pub const fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1748 unsafe {
1749 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1750 transmute::<i16x8, _>(x)
1751 }
1752}
1753
1754#[inline]
1758#[target_feature(enable = "sse2")]
1759#[cfg_attr(test, assert_instr(unpcklps))]
1760#[stable(feature = "simd_x86", since = "1.27.0")]
1761#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1762pub const fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1763 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1764}
1765
1766#[inline]
1770#[target_feature(enable = "sse2")]
1771#[cfg_attr(test, assert_instr(movlhps))]
1772#[stable(feature = "simd_x86", since = "1.27.0")]
1773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1774pub const fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1775 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1776}
1777
1778#[inline]
1783#[target_feature(enable = "sse2")]
1784#[cfg_attr(test, assert_instr(addsd))]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1788 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1789}
1790
1791#[inline]
1796#[target_feature(enable = "sse2")]
1797#[cfg_attr(test, assert_instr(addpd))]
1798#[stable(feature = "simd_x86", since = "1.27.0")]
1799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1800pub const fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1801 unsafe { simd_add(a, b) }
1802}
1803
1804#[inline]
1809#[target_feature(enable = "sse2")]
1810#[cfg_attr(test, assert_instr(divsd))]
1811#[stable(feature = "simd_x86", since = "1.27.0")]
1812#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1813pub const fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1814 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1815}
1816
1817#[inline]
1822#[target_feature(enable = "sse2")]
1823#[cfg_attr(test, assert_instr(divpd))]
1824#[stable(feature = "simd_x86", since = "1.27.0")]
1825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1826pub const fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1827 unsafe { simd_div(a, b) }
1828}
1829
1830#[inline]
1835#[target_feature(enable = "sse2")]
1836#[cfg_attr(test, assert_instr(maxsd))]
1837#[stable(feature = "simd_x86", since = "1.27.0")]
1838pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1839 unsafe { maxsd(a, b) }
1840}
1841
1842#[inline]
1847#[target_feature(enable = "sse2")]
1848#[cfg_attr(test, assert_instr(maxpd))]
1849#[stable(feature = "simd_x86", since = "1.27.0")]
1850pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1851 unsafe { maxpd(a, b) }
1852}
1853
1854#[inline]
1859#[target_feature(enable = "sse2")]
1860#[cfg_attr(test, assert_instr(minsd))]
1861#[stable(feature = "simd_x86", since = "1.27.0")]
1862pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1863 unsafe { minsd(a, b) }
1864}
1865
1866#[inline]
1871#[target_feature(enable = "sse2")]
1872#[cfg_attr(test, assert_instr(minpd))]
1873#[stable(feature = "simd_x86", since = "1.27.0")]
1874pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1875 unsafe { minpd(a, b) }
1876}
1877
1878#[inline]
1883#[target_feature(enable = "sse2")]
1884#[cfg_attr(test, assert_instr(mulsd))]
1885#[stable(feature = "simd_x86", since = "1.27.0")]
1886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1887pub const fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1888 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1889}
1890
1891#[inline]
1896#[target_feature(enable = "sse2")]
1897#[cfg_attr(test, assert_instr(mulpd))]
1898#[stable(feature = "simd_x86", since = "1.27.0")]
1899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1900pub const fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1901 unsafe { simd_mul(a, b) }
1902}
1903
1904#[inline]
1909#[target_feature(enable = "sse2")]
1910#[cfg_attr(test, assert_instr(sqrtsd))]
1911#[stable(feature = "simd_x86", since = "1.27.0")]
1912pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1913 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1914}
1915
1916#[inline]
1920#[target_feature(enable = "sse2")]
1921#[cfg_attr(test, assert_instr(sqrtpd))]
1922#[stable(feature = "simd_x86", since = "1.27.0")]
1923pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1924 unsafe { simd_fsqrt(a) }
1925}
1926
1927#[inline]
1932#[target_feature(enable = "sse2")]
1933#[cfg_attr(test, assert_instr(subsd))]
1934#[stable(feature = "simd_x86", since = "1.27.0")]
1935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1936pub const fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1937 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1938}
1939
1940#[inline]
1945#[target_feature(enable = "sse2")]
1946#[cfg_attr(test, assert_instr(subpd))]
1947#[stable(feature = "simd_x86", since = "1.27.0")]
1948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949pub const fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1950 unsafe { simd_sub(a, b) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(andps))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1962pub const fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1963 unsafe {
1964 let a: __m128i = transmute(a);
1965 let b: __m128i = transmute(b);
1966 transmute(_mm_and_si128(a, b))
1967 }
1968}
1969
1970#[inline]
1974#[target_feature(enable = "sse2")]
1975#[cfg_attr(test, assert_instr(andnps))]
1976#[stable(feature = "simd_x86", since = "1.27.0")]
1977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1978pub const fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1979 unsafe {
1980 let a: __m128i = transmute(a);
1981 let b: __m128i = transmute(b);
1982 transmute(_mm_andnot_si128(a, b))
1983 }
1984}
1985
1986#[inline]
1990#[target_feature(enable = "sse2")]
1991#[cfg_attr(test, assert_instr(orps))]
1992#[stable(feature = "simd_x86", since = "1.27.0")]
1993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1994pub const fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1995 unsafe {
1996 let a: __m128i = transmute(a);
1997 let b: __m128i = transmute(b);
1998 transmute(_mm_or_si128(a, b))
1999 }
2000}
2001
2002#[inline]
2006#[target_feature(enable = "sse2")]
2007#[cfg_attr(test, assert_instr(xorps))]
2008#[stable(feature = "simd_x86", since = "1.27.0")]
2009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2010pub const fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
2011 unsafe {
2012 let a: __m128i = transmute(a);
2013 let b: __m128i = transmute(b);
2014 transmute(_mm_xor_si128(a, b))
2015 }
2016}
2017
2018#[inline]
2023#[target_feature(enable = "sse2")]
2024#[cfg_attr(test, assert_instr(cmpeqsd))]
2025#[stable(feature = "simd_x86", since = "1.27.0")]
2026pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
2027 unsafe { cmpsd(a, b, 0) }
2028}
2029
2030#[inline]
2035#[target_feature(enable = "sse2")]
2036#[cfg_attr(test, assert_instr(cmpltsd))]
2037#[stable(feature = "simd_x86", since = "1.27.0")]
2038pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
2039 unsafe { cmpsd(a, b, 1) }
2040}
2041
2042#[inline]
2047#[target_feature(enable = "sse2")]
2048#[cfg_attr(test, assert_instr(cmplesd))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
2051 unsafe { cmpsd(a, b, 2) }
2052}
2053
2054#[inline]
2059#[target_feature(enable = "sse2")]
2060#[cfg_attr(test, assert_instr(cmpltsd))]
2061#[stable(feature = "simd_x86", since = "1.27.0")]
2062pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
2063 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2064}
2065
2066#[inline]
2071#[target_feature(enable = "sse2")]
2072#[cfg_attr(test, assert_instr(cmplesd))]
2073#[stable(feature = "simd_x86", since = "1.27.0")]
2074pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
2075 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2076}
2077
2078#[inline]
2085#[target_feature(enable = "sse2")]
2086#[cfg_attr(test, assert_instr(cmpordsd))]
2087#[stable(feature = "simd_x86", since = "1.27.0")]
2088pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2089 unsafe { cmpsd(a, b, 7) }
2090}
2091
2092#[inline]
2098#[target_feature(enable = "sse2")]
2099#[cfg_attr(test, assert_instr(cmpunordsd))]
2100#[stable(feature = "simd_x86", since = "1.27.0")]
2101pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2102 unsafe { cmpsd(a, b, 3) }
2103}
2104
2105#[inline]
2110#[target_feature(enable = "sse2")]
2111#[cfg_attr(test, assert_instr(cmpneqsd))]
2112#[stable(feature = "simd_x86", since = "1.27.0")]
2113pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2114 unsafe { cmpsd(a, b, 4) }
2115}
2116
2117#[inline]
2122#[target_feature(enable = "sse2")]
2123#[cfg_attr(test, assert_instr(cmpnltsd))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2126 unsafe { cmpsd(a, b, 5) }
2127}
2128
2129#[inline]
2134#[target_feature(enable = "sse2")]
2135#[cfg_attr(test, assert_instr(cmpnlesd))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2138 unsafe { cmpsd(a, b, 6) }
2139}
2140
2141#[inline]
2146#[target_feature(enable = "sse2")]
2147#[cfg_attr(test, assert_instr(cmpnltsd))]
2148#[stable(feature = "simd_x86", since = "1.27.0")]
2149pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2150 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2151}
2152
2153#[inline]
2158#[target_feature(enable = "sse2")]
2159#[cfg_attr(test, assert_instr(cmpnlesd))]
2160#[stable(feature = "simd_x86", since = "1.27.0")]
2161pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2162 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2163}
2164
2165#[inline]
2169#[target_feature(enable = "sse2")]
2170#[cfg_attr(test, assert_instr(cmpeqpd))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2173 unsafe { cmppd(a, b, 0) }
2174}
2175
2176#[inline]
2180#[target_feature(enable = "sse2")]
2181#[cfg_attr(test, assert_instr(cmpltpd))]
2182#[stable(feature = "simd_x86", since = "1.27.0")]
2183pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2184 unsafe { cmppd(a, b, 1) }
2185}
2186
2187#[inline]
2191#[target_feature(enable = "sse2")]
2192#[cfg_attr(test, assert_instr(cmplepd))]
2193#[stable(feature = "simd_x86", since = "1.27.0")]
2194pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2195 unsafe { cmppd(a, b, 2) }
2196}
2197
2198#[inline]
2202#[target_feature(enable = "sse2")]
2203#[cfg_attr(test, assert_instr(cmpltpd))]
2204#[stable(feature = "simd_x86", since = "1.27.0")]
2205pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2206 _mm_cmplt_pd(b, a)
2207}
2208
2209#[inline]
2213#[target_feature(enable = "sse2")]
2214#[cfg_attr(test, assert_instr(cmplepd))]
2215#[stable(feature = "simd_x86", since = "1.27.0")]
2216pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2217 _mm_cmple_pd(b, a)
2218}
2219
2220#[inline]
2224#[target_feature(enable = "sse2")]
2225#[cfg_attr(test, assert_instr(cmpordpd))]
2226#[stable(feature = "simd_x86", since = "1.27.0")]
2227pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2228 unsafe { cmppd(a, b, 7) }
2229}
2230
2231#[inline]
2235#[target_feature(enable = "sse2")]
2236#[cfg_attr(test, assert_instr(cmpunordpd))]
2237#[stable(feature = "simd_x86", since = "1.27.0")]
2238pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2239 unsafe { cmppd(a, b, 3) }
2240}
2241
2242#[inline]
2246#[target_feature(enable = "sse2")]
2247#[cfg_attr(test, assert_instr(cmpneqpd))]
2248#[stable(feature = "simd_x86", since = "1.27.0")]
2249pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2250 unsafe { cmppd(a, b, 4) }
2251}
2252
2253#[inline]
2257#[target_feature(enable = "sse2")]
2258#[cfg_attr(test, assert_instr(cmpnltpd))]
2259#[stable(feature = "simd_x86", since = "1.27.0")]
2260pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2261 unsafe { cmppd(a, b, 5) }
2262}
2263
2264#[inline]
2268#[target_feature(enable = "sse2")]
2269#[cfg_attr(test, assert_instr(cmpnlepd))]
2270#[stable(feature = "simd_x86", since = "1.27.0")]
2271pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2272 unsafe { cmppd(a, b, 6) }
2273}
2274
2275#[inline]
2279#[target_feature(enable = "sse2")]
2280#[cfg_attr(test, assert_instr(cmpnltpd))]
2281#[stable(feature = "simd_x86", since = "1.27.0")]
2282pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2283 _mm_cmpnlt_pd(b, a)
2284}
2285
2286#[inline]
2291#[target_feature(enable = "sse2")]
2292#[cfg_attr(test, assert_instr(cmpnlepd))]
2293#[stable(feature = "simd_x86", since = "1.27.0")]
2294pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2295 _mm_cmpnle_pd(b, a)
2296}
2297
2298#[inline]
2302#[target_feature(enable = "sse2")]
2303#[cfg_attr(test, assert_instr(comisd))]
2304#[stable(feature = "simd_x86", since = "1.27.0")]
2305pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2306 unsafe { comieqsd(a, b) }
2307}
2308
2309#[inline]
2313#[target_feature(enable = "sse2")]
2314#[cfg_attr(test, assert_instr(comisd))]
2315#[stable(feature = "simd_x86", since = "1.27.0")]
2316pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2317 unsafe { comiltsd(a, b) }
2318}
2319
2320#[inline]
2324#[target_feature(enable = "sse2")]
2325#[cfg_attr(test, assert_instr(comisd))]
2326#[stable(feature = "simd_x86", since = "1.27.0")]
2327pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2328 unsafe { comilesd(a, b) }
2329}
2330
2331#[inline]
2335#[target_feature(enable = "sse2")]
2336#[cfg_attr(test, assert_instr(comisd))]
2337#[stable(feature = "simd_x86", since = "1.27.0")]
2338pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2339 unsafe { comigtsd(a, b) }
2340}
2341
2342#[inline]
2346#[target_feature(enable = "sse2")]
2347#[cfg_attr(test, assert_instr(comisd))]
2348#[stable(feature = "simd_x86", since = "1.27.0")]
2349pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2350 unsafe { comigesd(a, b) }
2351}
2352
2353#[inline]
2357#[target_feature(enable = "sse2")]
2358#[cfg_attr(test, assert_instr(comisd))]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2361 unsafe { comineqsd(a, b) }
2362}
2363
2364#[inline]
2368#[target_feature(enable = "sse2")]
2369#[cfg_attr(test, assert_instr(ucomisd))]
2370#[stable(feature = "simd_x86", since = "1.27.0")]
2371pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2372 unsafe { ucomieqsd(a, b) }
2373}
2374
2375#[inline]
2379#[target_feature(enable = "sse2")]
2380#[cfg_attr(test, assert_instr(ucomisd))]
2381#[stable(feature = "simd_x86", since = "1.27.0")]
2382pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2383 unsafe { ucomiltsd(a, b) }
2384}
2385
2386#[inline]
2390#[target_feature(enable = "sse2")]
2391#[cfg_attr(test, assert_instr(ucomisd))]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2394 unsafe { ucomilesd(a, b) }
2395}
2396
2397#[inline]
2401#[target_feature(enable = "sse2")]
2402#[cfg_attr(test, assert_instr(ucomisd))]
2403#[stable(feature = "simd_x86", since = "1.27.0")]
2404pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2405 unsafe { ucomigtsd(a, b) }
2406}
2407
2408#[inline]
2412#[target_feature(enable = "sse2")]
2413#[cfg_attr(test, assert_instr(ucomisd))]
2414#[stable(feature = "simd_x86", since = "1.27.0")]
2415pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2416 unsafe { ucomigesd(a, b) }
2417}
2418
2419#[inline]
2423#[target_feature(enable = "sse2")]
2424#[cfg_attr(test, assert_instr(ucomisd))]
2425#[stable(feature = "simd_x86", since = "1.27.0")]
2426pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2427 unsafe { ucomineqsd(a, b) }
2428}
2429
2430#[inline]
2435#[target_feature(enable = "sse2")]
2436#[cfg_attr(test, assert_instr(cvtpd2ps))]
2437#[stable(feature = "simd_x86", since = "1.27.0")]
2438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2439pub const fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2440 unsafe {
2441 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2442 let zero = f32x2::ZERO;
2443 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2444 }
2445}
2446
2447#[inline]
2453#[target_feature(enable = "sse2")]
2454#[cfg_attr(test, assert_instr(cvtps2pd))]
2455#[stable(feature = "simd_x86", since = "1.27.0")]
2456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2457pub const fn _mm_cvtps_pd(a: __m128) -> __m128d {
2458 unsafe {
2459 let a = a.as_f32x4();
2460 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2461 }
2462}
2463
2464#[inline]
2469#[target_feature(enable = "sse2")]
2470#[cfg_attr(test, assert_instr(cvtpd2dq))]
2471#[stable(feature = "simd_x86", since = "1.27.0")]
2472pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2473 unsafe { transmute(cvtpd2dq(a)) }
2474}
2475
2476#[inline]
2481#[target_feature(enable = "sse2")]
2482#[cfg_attr(test, assert_instr(cvtsd2si))]
2483#[stable(feature = "simd_x86", since = "1.27.0")]
2484pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2485 unsafe { cvtsd2si(a) }
2486}
2487
2488#[inline]
2495#[target_feature(enable = "sse2")]
2496#[cfg_attr(test, assert_instr(cvtsd2ss))]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2499 unsafe { cvtsd2ss(a, b) }
2500}
2501
2502#[inline]
2506#[target_feature(enable = "sse2")]
2507#[stable(feature = "simd_x86", since = "1.27.0")]
2508#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2509pub const fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2510 unsafe { simd_extract!(a, 0) }
2511}
2512
2513#[inline]
2520#[target_feature(enable = "sse2")]
2521#[cfg_attr(test, assert_instr(cvtss2sd))]
2522#[stable(feature = "simd_x86", since = "1.27.0")]
2523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2524pub const fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2525 unsafe {
2526 let elt: f32 = simd_extract!(b, 0);
2527 simd_insert!(a, 0, elt as f64)
2528 }
2529}
2530
2531#[inline]
2536#[target_feature(enable = "sse2")]
2537#[cfg_attr(test, assert_instr(cvttpd2dq))]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2540 unsafe { transmute(cvttpd2dq(a)) }
2541}
2542
2543#[inline]
2548#[target_feature(enable = "sse2")]
2549#[cfg_attr(test, assert_instr(cvttsd2si))]
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2551pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2552 unsafe { cvttsd2si(a) }
2553}
2554
2555#[inline]
2560#[target_feature(enable = "sse2")]
2561#[cfg_attr(test, assert_instr(cvttps2dq))]
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2563pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2564 unsafe { transmute(cvttps2dq(a)) }
2565}
2566
2567#[inline]
2572#[target_feature(enable = "sse2")]
2573#[stable(feature = "simd_x86", since = "1.27.0")]
2574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2575pub const fn _mm_set_sd(a: f64) -> __m128d {
2576 _mm_set_pd(0.0, a)
2577}
2578
2579#[inline]
2584#[target_feature(enable = "sse2")]
2585#[stable(feature = "simd_x86", since = "1.27.0")]
2586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2587pub const fn _mm_set1_pd(a: f64) -> __m128d {
2588 _mm_set_pd(a, a)
2589}
2590
2591#[inline]
2596#[target_feature(enable = "sse2")]
2597#[stable(feature = "simd_x86", since = "1.27.0")]
2598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2599pub const fn _mm_set_pd1(a: f64) -> __m128d {
2600 _mm_set_pd(a, a)
2601}
2602
2603#[inline]
2608#[target_feature(enable = "sse2")]
2609#[stable(feature = "simd_x86", since = "1.27.0")]
2610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2611pub const fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2612 __m128d([b, a])
2613}
2614
2615#[inline]
2620#[target_feature(enable = "sse2")]
2621#[stable(feature = "simd_x86", since = "1.27.0")]
2622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2623pub const fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2624 _mm_set_pd(b, a)
2625}
2626
2627#[inline]
2632#[target_feature(enable = "sse2")]
2633#[cfg_attr(test, assert_instr(xorp))]
2634#[stable(feature = "simd_x86", since = "1.27.0")]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm_setzero_pd() -> __m128d {
2637 const { unsafe { mem::zeroed() } }
2638}
2639
2640#[inline]
2647#[target_feature(enable = "sse2")]
2648#[cfg_attr(test, assert_instr(movmskpd))]
2649#[stable(feature = "simd_x86", since = "1.27.0")]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm_movemask_pd(a: __m128d) -> i32 {
2652 unsafe {
2655 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2656 simd_bitmask::<i64x2, u8>(mask) as i32
2657 }
2658}
2659
2660#[inline]
2667#[target_feature(enable = "sse2")]
2668#[cfg_attr(
2669 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2670 assert_instr(movaps)
2671)]
2672#[stable(feature = "simd_x86", since = "1.27.0")]
2673#[allow(clippy::cast_ptr_alignment)]
2674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2675pub const unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2676 *(mem_addr as *const __m128d)
2677}
2678
2679#[inline]
2684#[target_feature(enable = "sse2")]
2685#[cfg_attr(test, assert_instr(movsd))]
2686#[stable(feature = "simd_x86", since = "1.27.0")]
2687#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2688pub const unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2689 _mm_setr_pd(*mem_addr, 0.)
2690}
2691
2692#[inline]
2698#[target_feature(enable = "sse2")]
2699#[cfg_attr(test, assert_instr(movhps))]
2700#[stable(feature = "simd_x86", since = "1.27.0")]
2701#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2702pub const unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2703 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2704}
2705
2706#[inline]
2712#[target_feature(enable = "sse2")]
2713#[cfg_attr(test, assert_instr(movlps))]
2714#[stable(feature = "simd_x86", since = "1.27.0")]
2715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2716pub const unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2717 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2718}
2719
2720#[inline]
2736#[target_feature(enable = "sse2")]
2737#[cfg_attr(test, assert_instr(movntpd))]
2738#[stable(feature = "simd_x86", since = "1.27.0")]
2739#[allow(clippy::cast_ptr_alignment)]
2740pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2741 crate::arch::asm!(
2743 vps!("movntpd", ",{a}"),
2744 p = in(reg) mem_addr,
2745 a = in(xmm_reg) a,
2746 options(nostack, preserves_flags),
2747 );
2748}
2749
2750#[inline]
2755#[target_feature(enable = "sse2")]
2756#[cfg_attr(test, assert_instr(movlps))]
2757#[stable(feature = "simd_x86", since = "1.27.0")]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2760 *mem_addr = simd_extract!(a, 0)
2761}
2762
2763#[inline]
2769#[target_feature(enable = "sse2")]
2770#[cfg_attr(
2771 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2772 assert_instr(movaps)
2773)]
2774#[stable(feature = "simd_x86", since = "1.27.0")]
2775#[allow(clippy::cast_ptr_alignment)]
2776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2777pub const unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2778 *(mem_addr as *mut __m128d) = a;
2779}
2780
2781#[inline]
2787#[target_feature(enable = "sse2")]
2788#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2791pub const unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2792 mem_addr.cast::<__m128d>().write_unaligned(a);
2793}
2794
2795#[inline]
2801#[target_feature(enable = "sse2")]
2802#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2803#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2804pub const unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2805 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2806}
2807
2808#[inline]
2814#[target_feature(enable = "sse2")]
2815#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2817pub const unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2818 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2819}
2820
2821#[inline]
2827#[target_feature(enable = "sse2")]
2828#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2830pub const unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2831 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2832}
2833
2834#[inline]
2840#[target_feature(enable = "sse2")]
2841#[stable(feature = "simd_x86", since = "1.27.0")]
2842#[allow(clippy::cast_ptr_alignment)]
2843#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2844pub const unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2845 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2846 *(mem_addr as *mut __m128d) = b;
2847}
2848
2849#[inline]
2855#[target_feature(enable = "sse2")]
2856#[stable(feature = "simd_x86", since = "1.27.0")]
2857#[allow(clippy::cast_ptr_alignment)]
2858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2859pub const unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2860 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2861 *(mem_addr as *mut __m128d) = b;
2862}
2863
2864#[inline]
2871#[target_feature(enable = "sse2")]
2872#[stable(feature = "simd_x86", since = "1.27.0")]
2873#[allow(clippy::cast_ptr_alignment)]
2874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2875pub const unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2876 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2877 *(mem_addr as *mut __m128d) = b;
2878}
2879
2880#[inline]
2885#[target_feature(enable = "sse2")]
2886#[cfg_attr(test, assert_instr(movhps))]
2887#[stable(feature = "simd_x86", since = "1.27.0")]
2888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2889pub const unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2890 *mem_addr = simd_extract!(a, 1);
2891}
2892
2893#[inline]
2898#[target_feature(enable = "sse2")]
2899#[cfg_attr(test, assert_instr(movlps))]
2900#[stable(feature = "simd_x86", since = "1.27.0")]
2901#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2902pub const unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2903 *mem_addr = simd_extract!(a, 0);
2904}
2905
2906#[inline]
2911#[target_feature(enable = "sse2")]
2912#[stable(feature = "simd_x86", since = "1.27.0")]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2916 let d = *mem_addr;
2917 _mm_setr_pd(d, d)
2918}
2919
2920#[inline]
2925#[target_feature(enable = "sse2")]
2926#[stable(feature = "simd_x86", since = "1.27.0")]
2928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2929pub const unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2930 _mm_load1_pd(mem_addr)
2931}
2932
2933#[inline]
2939#[target_feature(enable = "sse2")]
2940#[cfg_attr(
2941 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2942 assert_instr(movaps)
2943)]
2944#[stable(feature = "simd_x86", since = "1.27.0")]
2945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2946pub const unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2947 let a = _mm_load_pd(mem_addr);
2948 simd_shuffle!(a, a, [1, 0])
2949}
2950
2951#[inline]
2957#[target_feature(enable = "sse2")]
2958#[cfg_attr(test, assert_instr(movups))]
2959#[stable(feature = "simd_x86", since = "1.27.0")]
2960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2961pub const unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2962 let mut dst = _mm_undefined_pd();
2963 ptr::copy_nonoverlapping(
2964 mem_addr as *const u8,
2965 ptr::addr_of_mut!(dst) as *mut u8,
2966 mem::size_of::<__m128d>(),
2967 );
2968 dst
2969}
2970
2971#[inline]
2977#[target_feature(enable = "sse2")]
2978#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2980pub const unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2981 transmute(i16x8::new(
2982 ptr::read_unaligned(mem_addr as *const i16),
2983 0,
2984 0,
2985 0,
2986 0,
2987 0,
2988 0,
2989 0,
2990 ))
2991}
2992
2993#[inline]
2999#[target_feature(enable = "sse2")]
3000#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3001#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3002pub const unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
3003 transmute(i32x4::new(
3004 ptr::read_unaligned(mem_addr as *const i32),
3005 0,
3006 0,
3007 0,
3008 ))
3009}
3010
3011#[inline]
3017#[target_feature(enable = "sse2")]
3018#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
3019#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3020pub const unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
3021 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
3022}
3023
3024#[inline]
3030#[target_feature(enable = "sse2")]
3031#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
3032#[rustc_legacy_const_generics(2)]
3033#[stable(feature = "simd_x86", since = "1.27.0")]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
3036 static_assert_uimm_bits!(MASK, 8);
3037 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
3038}
3039
3040#[inline]
3046#[target_feature(enable = "sse2")]
3047#[cfg_attr(test, assert_instr(movsd))]
3048#[stable(feature = "simd_x86", since = "1.27.0")]
3049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3050pub const fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
3051 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
3052}
3053
3054#[inline]
3059#[target_feature(enable = "sse2")]
3060#[stable(feature = "simd_x86", since = "1.27.0")]
3061#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3062pub const fn _mm_castpd_ps(a: __m128d) -> __m128 {
3063 unsafe { transmute(a) }
3064}
3065
3066#[inline]
3071#[target_feature(enable = "sse2")]
3072#[stable(feature = "simd_x86", since = "1.27.0")]
3073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3074pub const fn _mm_castpd_si128(a: __m128d) -> __m128i {
3075 unsafe { transmute(a) }
3076}
3077
3078#[inline]
3083#[target_feature(enable = "sse2")]
3084#[stable(feature = "simd_x86", since = "1.27.0")]
3085#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3086pub const fn _mm_castps_pd(a: __m128) -> __m128d {
3087 unsafe { transmute(a) }
3088}
3089
3090#[inline]
3095#[target_feature(enable = "sse2")]
3096#[stable(feature = "simd_x86", since = "1.27.0")]
3097#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3098pub const fn _mm_castps_si128(a: __m128) -> __m128i {
3099 unsafe { transmute(a) }
3100}
3101
3102#[inline]
3107#[target_feature(enable = "sse2")]
3108#[stable(feature = "simd_x86", since = "1.27.0")]
3109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3110pub const fn _mm_castsi128_pd(a: __m128i) -> __m128d {
3111 unsafe { transmute(a) }
3112}
3113
3114#[inline]
3119#[target_feature(enable = "sse2")]
3120#[stable(feature = "simd_x86", since = "1.27.0")]
3121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3122pub const fn _mm_castsi128_ps(a: __m128i) -> __m128 {
3123 unsafe { transmute(a) }
3124}
3125
3126#[inline]
3133#[target_feature(enable = "sse2")]
3134#[stable(feature = "simd_x86", since = "1.27.0")]
3135#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3136pub const fn _mm_undefined_pd() -> __m128d {
3137 const { unsafe { mem::zeroed() } }
3138}
3139
3140#[inline]
3147#[target_feature(enable = "sse2")]
3148#[stable(feature = "simd_x86", since = "1.27.0")]
3149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3150pub const fn _mm_undefined_si128() -> __m128i {
3151 const { unsafe { mem::zeroed() } }
3152}
3153
3154#[inline]
3162#[target_feature(enable = "sse2")]
3163#[cfg_attr(test, assert_instr(unpckhpd))]
3164#[stable(feature = "simd_x86", since = "1.27.0")]
3165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3166pub const fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3167 unsafe { simd_shuffle!(a, b, [1, 3]) }
3168}
3169
3170#[inline]
3178#[target_feature(enable = "sse2")]
3179#[cfg_attr(test, assert_instr(movlhps))]
3180#[stable(feature = "simd_x86", since = "1.27.0")]
3181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3182pub const fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3183 unsafe { simd_shuffle!(a, b, [0, 2]) }
3184}
3185
3186#[allow(improper_ctypes)]
3187unsafe extern "C" {
3188 #[link_name = "llvm.x86.sse2.pause"]
3189 fn pause();
3190 #[link_name = "llvm.x86.sse2.clflush"]
3191 fn clflush(p: *const u8);
3192 #[link_name = "llvm.x86.sse2.lfence"]
3193 fn lfence();
3194 #[link_name = "llvm.x86.sse2.mfence"]
3195 fn mfence();
3196 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3197 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3198 #[link_name = "llvm.x86.sse2.psad.bw"]
3199 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3200 #[link_name = "llvm.x86.sse2.psll.w"]
3201 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3202 #[link_name = "llvm.x86.sse2.psll.d"]
3203 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3204 #[link_name = "llvm.x86.sse2.psll.q"]
3205 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3206 #[link_name = "llvm.x86.sse2.psra.w"]
3207 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3208 #[link_name = "llvm.x86.sse2.psra.d"]
3209 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3210 #[link_name = "llvm.x86.sse2.psrl.w"]
3211 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3212 #[link_name = "llvm.x86.sse2.psrl.d"]
3213 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3214 #[link_name = "llvm.x86.sse2.psrl.q"]
3215 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3216 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3217 fn cvtps2dq(a: __m128) -> i32x4;
3218 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3219 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3220 #[link_name = "llvm.x86.sse2.packsswb.128"]
3221 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3222 #[link_name = "llvm.x86.sse2.packssdw.128"]
3223 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3224 #[link_name = "llvm.x86.sse2.packuswb.128"]
3225 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3226 #[link_name = "llvm.x86.sse2.max.sd"]
3227 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3228 #[link_name = "llvm.x86.sse2.max.pd"]
3229 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3230 #[link_name = "llvm.x86.sse2.min.sd"]
3231 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3232 #[link_name = "llvm.x86.sse2.min.pd"]
3233 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3234 #[link_name = "llvm.x86.sse2.cmp.sd"]
3235 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3236 #[link_name = "llvm.x86.sse2.cmp.pd"]
3237 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3238 #[link_name = "llvm.x86.sse2.comieq.sd"]
3239 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3240 #[link_name = "llvm.x86.sse2.comilt.sd"]
3241 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3242 #[link_name = "llvm.x86.sse2.comile.sd"]
3243 fn comilesd(a: __m128d, b: __m128d) -> i32;
3244 #[link_name = "llvm.x86.sse2.comigt.sd"]
3245 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3246 #[link_name = "llvm.x86.sse2.comige.sd"]
3247 fn comigesd(a: __m128d, b: __m128d) -> i32;
3248 #[link_name = "llvm.x86.sse2.comineq.sd"]
3249 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3250 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3251 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3252 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3253 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3254 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3255 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3256 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3257 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3258 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3259 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3260 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3261 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3262 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3263 fn cvtpd2dq(a: __m128d) -> i32x4;
3264 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3265 fn cvtsd2si(a: __m128d) -> i32;
3266 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3267 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3268 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3269 fn cvttpd2dq(a: __m128d) -> i32x4;
3270 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3271 fn cvttsd2si(a: __m128d) -> i32;
3272 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3273 fn cvttps2dq(a: __m128) -> i32x4;
3274}
3275
3276#[cfg(test)]
3277mod tests {
3278 use crate::core_arch::assert_eq_const as assert_eq;
3279 use crate::{
3280 core_arch::{simd::*, x86::*},
3281 hint::black_box,
3282 };
3283 use std::{
3284 boxed, f32, f64,
3285 mem::{self, transmute},
3286 ptr,
3287 };
3288 use stdarch_test::simd_test;
3289
3290 const NAN: f64 = f64::NAN;
3291
3292 #[test]
3293 fn test_mm_pause() {
3294 _mm_pause()
3295 }
3296
3297 #[simd_test(enable = "sse2")]
3298 unsafe fn test_mm_clflush() {
3299 let x = 0_u8;
3300 _mm_clflush(ptr::addr_of!(x));
3301 }
3302
3303 #[simd_test(enable = "sse2")]
3304 #[cfg_attr(miri, ignore)]
3306 fn test_mm_lfence() {
3307 _mm_lfence();
3308 }
3309
3310 #[simd_test(enable = "sse2")]
3311 #[cfg_attr(miri, ignore)]
3313 fn test_mm_mfence() {
3314 _mm_mfence();
3315 }
3316
3317 #[simd_test(enable = "sse2")]
3318 const fn test_mm_add_epi8() {
3319 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3320 #[rustfmt::skip]
3321 let b = _mm_setr_epi8(
3322 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3323 );
3324 let r = _mm_add_epi8(a, b);
3325 #[rustfmt::skip]
3326 let e = _mm_setr_epi8(
3327 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3328 );
3329 assert_eq_m128i(r, e);
3330 }
3331
3332 #[simd_test(enable = "sse2")]
3333 fn test_mm_add_epi8_overflow() {
3334 let a = _mm_set1_epi8(0x7F);
3335 let b = _mm_set1_epi8(1);
3336 let r = _mm_add_epi8(a, b);
3337 assert_eq_m128i(r, _mm_set1_epi8(-128));
3338 }
3339
3340 #[simd_test(enable = "sse2")]
3341 const fn test_mm_add_epi16() {
3342 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3343 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3344 let r = _mm_add_epi16(a, b);
3345 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3346 assert_eq_m128i(r, e);
3347 }
3348
3349 #[simd_test(enable = "sse2")]
3350 const fn test_mm_add_epi32() {
3351 let a = _mm_setr_epi32(0, 1, 2, 3);
3352 let b = _mm_setr_epi32(4, 5, 6, 7);
3353 let r = _mm_add_epi32(a, b);
3354 let e = _mm_setr_epi32(4, 6, 8, 10);
3355 assert_eq_m128i(r, e);
3356 }
3357
3358 #[simd_test(enable = "sse2")]
3359 const fn test_mm_add_epi64() {
3360 let a = _mm_setr_epi64x(0, 1);
3361 let b = _mm_setr_epi64x(2, 3);
3362 let r = _mm_add_epi64(a, b);
3363 let e = _mm_setr_epi64x(2, 4);
3364 assert_eq_m128i(r, e);
3365 }
3366
3367 #[simd_test(enable = "sse2")]
3368 const fn test_mm_adds_epi8() {
3369 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3370 #[rustfmt::skip]
3371 let b = _mm_setr_epi8(
3372 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3373 );
3374 let r = _mm_adds_epi8(a, b);
3375 #[rustfmt::skip]
3376 let e = _mm_setr_epi8(
3377 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3378 );
3379 assert_eq_m128i(r, e);
3380 }
3381
3382 #[simd_test(enable = "sse2")]
3383 fn test_mm_adds_epi8_saturate_positive() {
3384 let a = _mm_set1_epi8(0x7F);
3385 let b = _mm_set1_epi8(1);
3386 let r = _mm_adds_epi8(a, b);
3387 assert_eq_m128i(r, a);
3388 }
3389
3390 #[simd_test(enable = "sse2")]
3391 fn test_mm_adds_epi8_saturate_negative() {
3392 let a = _mm_set1_epi8(-0x80);
3393 let b = _mm_set1_epi8(-1);
3394 let r = _mm_adds_epi8(a, b);
3395 assert_eq_m128i(r, a);
3396 }
3397
3398 #[simd_test(enable = "sse2")]
3399 const fn test_mm_adds_epi16() {
3400 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3401 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3402 let r = _mm_adds_epi16(a, b);
3403 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3404 assert_eq_m128i(r, e);
3405 }
3406
3407 #[simd_test(enable = "sse2")]
3408 fn test_mm_adds_epi16_saturate_positive() {
3409 let a = _mm_set1_epi16(0x7FFF);
3410 let b = _mm_set1_epi16(1);
3411 let r = _mm_adds_epi16(a, b);
3412 assert_eq_m128i(r, a);
3413 }
3414
3415 #[simd_test(enable = "sse2")]
3416 fn test_mm_adds_epi16_saturate_negative() {
3417 let a = _mm_set1_epi16(-0x8000);
3418 let b = _mm_set1_epi16(-1);
3419 let r = _mm_adds_epi16(a, b);
3420 assert_eq_m128i(r, a);
3421 }
3422
3423 #[simd_test(enable = "sse2")]
3424 const fn test_mm_adds_epu8() {
3425 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3426 #[rustfmt::skip]
3427 let b = _mm_setr_epi8(
3428 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3429 );
3430 let r = _mm_adds_epu8(a, b);
3431 #[rustfmt::skip]
3432 let e = _mm_setr_epi8(
3433 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3434 );
3435 assert_eq_m128i(r, e);
3436 }
3437
3438 #[simd_test(enable = "sse2")]
3439 fn test_mm_adds_epu8_saturate() {
3440 let a = _mm_set1_epi8(!0);
3441 let b = _mm_set1_epi8(1);
3442 let r = _mm_adds_epu8(a, b);
3443 assert_eq_m128i(r, a);
3444 }
3445
3446 #[simd_test(enable = "sse2")]
3447 const fn test_mm_adds_epu16() {
3448 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3449 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3450 let r = _mm_adds_epu16(a, b);
3451 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3452 assert_eq_m128i(r, e);
3453 }
3454
3455 #[simd_test(enable = "sse2")]
3456 fn test_mm_adds_epu16_saturate() {
3457 let a = _mm_set1_epi16(!0);
3458 let b = _mm_set1_epi16(1);
3459 let r = _mm_adds_epu16(a, b);
3460 assert_eq_m128i(r, a);
3461 }
3462
3463 #[simd_test(enable = "sse2")]
3464 const fn test_mm_avg_epu8() {
3465 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3466 let r = _mm_avg_epu8(a, b);
3467 assert_eq_m128i(r, _mm_set1_epi8(6));
3468 }
3469
3470 #[simd_test(enable = "sse2")]
3471 const fn test_mm_avg_epu16() {
3472 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3473 let r = _mm_avg_epu16(a, b);
3474 assert_eq_m128i(r, _mm_set1_epi16(6));
3475 }
3476
3477 #[simd_test(enable = "sse2")]
3478 fn test_mm_madd_epi16() {
3479 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3480 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3481 let r = _mm_madd_epi16(a, b);
3482 let e = _mm_setr_epi32(29, 81, 149, 233);
3483 assert_eq_m128i(r, e);
3484
3485 let a = _mm_setr_epi16(
3488 i16::MAX,
3489 i16::MAX,
3490 i16::MIN,
3491 i16::MIN,
3492 i16::MIN,
3493 i16::MAX,
3494 0,
3495 0,
3496 );
3497 let b = _mm_setr_epi16(
3498 i16::MAX,
3499 i16::MAX,
3500 i16::MIN,
3501 i16::MIN,
3502 i16::MAX,
3503 i16::MIN,
3504 0,
3505 0,
3506 );
3507 let r = _mm_madd_epi16(a, b);
3508 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3509 assert_eq_m128i(r, e);
3510 }
3511
3512 #[simd_test(enable = "sse2")]
3513 const fn test_mm_max_epi16() {
3514 let a = _mm_set1_epi16(1);
3515 let b = _mm_set1_epi16(-1);
3516 let r = _mm_max_epi16(a, b);
3517 assert_eq_m128i(r, a);
3518 }
3519
3520 #[simd_test(enable = "sse2")]
3521 const fn test_mm_max_epu8() {
3522 let a = _mm_set1_epi8(1);
3523 let b = _mm_set1_epi8(!0);
3524 let r = _mm_max_epu8(a, b);
3525 assert_eq_m128i(r, b);
3526 }
3527
3528 #[simd_test(enable = "sse2")]
3529 const fn test_mm_min_epi16() {
3530 let a = _mm_set1_epi16(1);
3531 let b = _mm_set1_epi16(-1);
3532 let r = _mm_min_epi16(a, b);
3533 assert_eq_m128i(r, b);
3534 }
3535
3536 #[simd_test(enable = "sse2")]
3537 const fn test_mm_min_epu8() {
3538 let a = _mm_set1_epi8(1);
3539 let b = _mm_set1_epi8(!0);
3540 let r = _mm_min_epu8(a, b);
3541 assert_eq_m128i(r, a);
3542 }
3543
3544 #[simd_test(enable = "sse2")]
3545 const fn test_mm_mulhi_epi16() {
3546 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3547 let r = _mm_mulhi_epi16(a, b);
3548 assert_eq_m128i(r, _mm_set1_epi16(-16));
3549 }
3550
3551 #[simd_test(enable = "sse2")]
3552 const fn test_mm_mulhi_epu16() {
3553 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3554 let r = _mm_mulhi_epu16(a, b);
3555 assert_eq_m128i(r, _mm_set1_epi16(15));
3556 }
3557
3558 #[simd_test(enable = "sse2")]
3559 const fn test_mm_mullo_epi16() {
3560 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3561 let r = _mm_mullo_epi16(a, b);
3562 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3563 }
3564
3565 #[simd_test(enable = "sse2")]
3566 const fn test_mm_mul_epu32() {
3567 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3568 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3569 let r = _mm_mul_epu32(a, b);
3570 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3571 assert_eq_m128i(r, e);
3572 }
3573
3574 #[simd_test(enable = "sse2")]
3575 fn test_mm_sad_epu8() {
3576 #[rustfmt::skip]
3577 let a = _mm_setr_epi8(
3578 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3579 1, 2, 3, 4,
3580 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3581 1, 2, 3, 4,
3582 );
3583 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3584 let r = _mm_sad_epu8(a, b);
3585 let e = _mm_setr_epi64x(1020, 614);
3586 assert_eq_m128i(r, e);
3587 }
3588
3589 #[simd_test(enable = "sse2")]
3590 const fn test_mm_sub_epi8() {
3591 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3592 let r = _mm_sub_epi8(a, b);
3593 assert_eq_m128i(r, _mm_set1_epi8(-1));
3594 }
3595
3596 #[simd_test(enable = "sse2")]
3597 const fn test_mm_sub_epi16() {
3598 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3599 let r = _mm_sub_epi16(a, b);
3600 assert_eq_m128i(r, _mm_set1_epi16(-1));
3601 }
3602
3603 #[simd_test(enable = "sse2")]
3604 const fn test_mm_sub_epi32() {
3605 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3606 let r = _mm_sub_epi32(a, b);
3607 assert_eq_m128i(r, _mm_set1_epi32(-1));
3608 }
3609
3610 #[simd_test(enable = "sse2")]
3611 const fn test_mm_sub_epi64() {
3612 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3613 let r = _mm_sub_epi64(a, b);
3614 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3615 }
3616
3617 #[simd_test(enable = "sse2")]
3618 const fn test_mm_subs_epi8() {
3619 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3620 let r = _mm_subs_epi8(a, b);
3621 assert_eq_m128i(r, _mm_set1_epi8(3));
3622 }
3623
3624 #[simd_test(enable = "sse2")]
3625 fn test_mm_subs_epi8_saturate_positive() {
3626 let a = _mm_set1_epi8(0x7F);
3627 let b = _mm_set1_epi8(-1);
3628 let r = _mm_subs_epi8(a, b);
3629 assert_eq_m128i(r, a);
3630 }
3631
3632 #[simd_test(enable = "sse2")]
3633 fn test_mm_subs_epi8_saturate_negative() {
3634 let a = _mm_set1_epi8(-0x80);
3635 let b = _mm_set1_epi8(1);
3636 let r = _mm_subs_epi8(a, b);
3637 assert_eq_m128i(r, a);
3638 }
3639
3640 #[simd_test(enable = "sse2")]
3641 const fn test_mm_subs_epi16() {
3642 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3643 let r = _mm_subs_epi16(a, b);
3644 assert_eq_m128i(r, _mm_set1_epi16(3));
3645 }
3646
3647 #[simd_test(enable = "sse2")]
3648 fn test_mm_subs_epi16_saturate_positive() {
3649 let a = _mm_set1_epi16(0x7FFF);
3650 let b = _mm_set1_epi16(-1);
3651 let r = _mm_subs_epi16(a, b);
3652 assert_eq_m128i(r, a);
3653 }
3654
3655 #[simd_test(enable = "sse2")]
3656 fn test_mm_subs_epi16_saturate_negative() {
3657 let a = _mm_set1_epi16(-0x8000);
3658 let b = _mm_set1_epi16(1);
3659 let r = _mm_subs_epi16(a, b);
3660 assert_eq_m128i(r, a);
3661 }
3662
3663 #[simd_test(enable = "sse2")]
3664 const fn test_mm_subs_epu8() {
3665 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3666 let r = _mm_subs_epu8(a, b);
3667 assert_eq_m128i(r, _mm_set1_epi8(3));
3668 }
3669
3670 #[simd_test(enable = "sse2")]
3671 fn test_mm_subs_epu8_saturate() {
3672 let a = _mm_set1_epi8(0);
3673 let b = _mm_set1_epi8(1);
3674 let r = _mm_subs_epu8(a, b);
3675 assert_eq_m128i(r, a);
3676 }
3677
3678 #[simd_test(enable = "sse2")]
3679 const fn test_mm_subs_epu16() {
3680 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3681 let r = _mm_subs_epu16(a, b);
3682 assert_eq_m128i(r, _mm_set1_epi16(3));
3683 }
3684
3685 #[simd_test(enable = "sse2")]
3686 fn test_mm_subs_epu16_saturate() {
3687 let a = _mm_set1_epi16(0);
3688 let b = _mm_set1_epi16(1);
3689 let r = _mm_subs_epu16(a, b);
3690 assert_eq_m128i(r, a);
3691 }
3692
3693 #[simd_test(enable = "sse2")]
3694 const fn test_mm_slli_si128() {
3695 #[rustfmt::skip]
3696 let a = _mm_setr_epi8(
3697 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3698 );
3699 let r = _mm_slli_si128::<1>(a);
3700 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3701 assert_eq_m128i(r, e);
3702
3703 #[rustfmt::skip]
3704 let a = _mm_setr_epi8(
3705 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3706 );
3707 let r = _mm_slli_si128::<15>(a);
3708 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3709 assert_eq_m128i(r, e);
3710
3711 #[rustfmt::skip]
3712 let a = _mm_setr_epi8(
3713 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3714 );
3715 let r = _mm_slli_si128::<16>(a);
3716 assert_eq_m128i(r, _mm_set1_epi8(0));
3717 }
3718
3719 #[simd_test(enable = "sse2")]
3720 const fn test_mm_slli_epi16() {
3721 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3722 let r = _mm_slli_epi16::<4>(a);
3723 assert_eq_m128i(
3724 r,
3725 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3726 );
3727 let r = _mm_slli_epi16::<16>(a);
3728 assert_eq_m128i(r, _mm_set1_epi16(0));
3729 }
3730
3731 #[simd_test(enable = "sse2")]
3732 unsafe fn test_mm_sll_epi16() {
3733 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3734 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3735 assert_eq_m128i(
3736 r,
3737 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3738 );
3739 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3740 assert_eq_m128i(r, a);
3741 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3742 assert_eq_m128i(r, _mm_set1_epi16(0));
3743 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3744 assert_eq_m128i(r, _mm_set1_epi16(0));
3745 }
3746
3747 #[simd_test(enable = "sse2")]
3748 const fn test_mm_slli_epi32() {
3749 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3750 let r = _mm_slli_epi32::<4>(a);
3751 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3752 let r = _mm_slli_epi32::<32>(a);
3753 assert_eq_m128i(r, _mm_set1_epi32(0));
3754 }
3755
3756 #[simd_test(enable = "sse2")]
3757 fn test_mm_sll_epi32() {
3758 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3759 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3760 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3761 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3762 assert_eq_m128i(r, a);
3763 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3764 assert_eq_m128i(r, _mm_set1_epi32(0));
3765 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3766 assert_eq_m128i(r, _mm_set1_epi32(0));
3767 }
3768
3769 #[simd_test(enable = "sse2")]
3770 const fn test_mm_slli_epi64() {
3771 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3772 let r = _mm_slli_epi64::<4>(a);
3773 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3774 let r = _mm_slli_epi64::<64>(a);
3775 assert_eq_m128i(r, _mm_set1_epi64x(0));
3776 }
3777
3778 #[simd_test(enable = "sse2")]
3779 fn test_mm_sll_epi64() {
3780 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3781 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3782 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3783 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3784 assert_eq_m128i(r, a);
3785 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3786 assert_eq_m128i(r, _mm_set1_epi64x(0));
3787 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3788 assert_eq_m128i(r, _mm_set1_epi64x(0));
3789 }
3790
3791 #[simd_test(enable = "sse2")]
3792 const fn test_mm_srai_epi16() {
3793 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3794 let r = _mm_srai_epi16::<4>(a);
3795 assert_eq_m128i(
3796 r,
3797 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3798 );
3799 let r = _mm_srai_epi16::<16>(a);
3800 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3801 }
3802
3803 #[simd_test(enable = "sse2")]
3804 fn test_mm_sra_epi16() {
3805 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3806 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3807 assert_eq_m128i(
3808 r,
3809 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3810 );
3811 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3812 assert_eq_m128i(r, a);
3813 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3814 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3815 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3816 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3817 }
3818
3819 #[simd_test(enable = "sse2")]
3820 const fn test_mm_srai_epi32() {
3821 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3822 let r = _mm_srai_epi32::<4>(a);
3823 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3824 let r = _mm_srai_epi32::<32>(a);
3825 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3826 }
3827
3828 #[simd_test(enable = "sse2")]
3829 fn test_mm_sra_epi32() {
3830 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3831 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3832 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3833 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3834 assert_eq_m128i(r, a);
3835 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3836 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3837 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3838 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3839 }
3840
3841 #[simd_test(enable = "sse2")]
3842 const fn test_mm_srli_si128() {
3843 #[rustfmt::skip]
3844 let a = _mm_setr_epi8(
3845 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3846 );
3847 let r = _mm_srli_si128::<1>(a);
3848 #[rustfmt::skip]
3849 let e = _mm_setr_epi8(
3850 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3851 );
3852 assert_eq_m128i(r, e);
3853
3854 #[rustfmt::skip]
3855 let a = _mm_setr_epi8(
3856 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3857 );
3858 let r = _mm_srli_si128::<15>(a);
3859 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3860 assert_eq_m128i(r, e);
3861
3862 #[rustfmt::skip]
3863 let a = _mm_setr_epi8(
3864 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3865 );
3866 let r = _mm_srli_si128::<16>(a);
3867 assert_eq_m128i(r, _mm_set1_epi8(0));
3868 }
3869
3870 #[simd_test(enable = "sse2")]
3871 const fn test_mm_srli_epi16() {
3872 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3873 let r = _mm_srli_epi16::<4>(a);
3874 assert_eq_m128i(
3875 r,
3876 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3877 );
3878 let r = _mm_srli_epi16::<16>(a);
3879 assert_eq_m128i(r, _mm_set1_epi16(0));
3880 }
3881
3882 #[simd_test(enable = "sse2")]
3883 fn test_mm_srl_epi16() {
3884 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3885 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3886 assert_eq_m128i(
3887 r,
3888 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3889 );
3890 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3891 assert_eq_m128i(r, a);
3892 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3893 assert_eq_m128i(r, _mm_set1_epi16(0));
3894 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3895 assert_eq_m128i(r, _mm_set1_epi16(0));
3896 }
3897
3898 #[simd_test(enable = "sse2")]
3899 const fn test_mm_srli_epi32() {
3900 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3901 let r = _mm_srli_epi32::<4>(a);
3902 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3903 let r = _mm_srli_epi32::<32>(a);
3904 assert_eq_m128i(r, _mm_set1_epi32(0));
3905 }
3906
3907 #[simd_test(enable = "sse2")]
3908 fn test_mm_srl_epi32() {
3909 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3910 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3911 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3912 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3913 assert_eq_m128i(r, a);
3914 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3915 assert_eq_m128i(r, _mm_set1_epi32(0));
3916 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3917 assert_eq_m128i(r, _mm_set1_epi32(0));
3918 }
3919
3920 #[simd_test(enable = "sse2")]
3921 const fn test_mm_srli_epi64() {
3922 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3923 let r = _mm_srli_epi64::<4>(a);
3924 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3925 let r = _mm_srli_epi64::<64>(a);
3926 assert_eq_m128i(r, _mm_set1_epi64x(0));
3927 }
3928
3929 #[simd_test(enable = "sse2")]
3930 fn test_mm_srl_epi64() {
3931 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3932 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3933 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3934 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3935 assert_eq_m128i(r, a);
3936 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3937 assert_eq_m128i(r, _mm_set1_epi64x(0));
3938 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3939 assert_eq_m128i(r, _mm_set1_epi64x(0));
3940 }
3941
3942 #[simd_test(enable = "sse2")]
3943 const fn test_mm_and_si128() {
3944 let a = _mm_set1_epi8(5);
3945 let b = _mm_set1_epi8(3);
3946 let r = _mm_and_si128(a, b);
3947 assert_eq_m128i(r, _mm_set1_epi8(1));
3948 }
3949
3950 #[simd_test(enable = "sse2")]
3951 const fn test_mm_andnot_si128() {
3952 let a = _mm_set1_epi8(5);
3953 let b = _mm_set1_epi8(3);
3954 let r = _mm_andnot_si128(a, b);
3955 assert_eq_m128i(r, _mm_set1_epi8(2));
3956 }
3957
3958 #[simd_test(enable = "sse2")]
3959 const fn test_mm_or_si128() {
3960 let a = _mm_set1_epi8(5);
3961 let b = _mm_set1_epi8(3);
3962 let r = _mm_or_si128(a, b);
3963 assert_eq_m128i(r, _mm_set1_epi8(7));
3964 }
3965
3966 #[simd_test(enable = "sse2")]
3967 const fn test_mm_xor_si128() {
3968 let a = _mm_set1_epi8(5);
3969 let b = _mm_set1_epi8(3);
3970 let r = _mm_xor_si128(a, b);
3971 assert_eq_m128i(r, _mm_set1_epi8(6));
3972 }
3973
3974 #[simd_test(enable = "sse2")]
3975 const fn test_mm_cmpeq_epi8() {
3976 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3977 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3978 let r = _mm_cmpeq_epi8(a, b);
3979 #[rustfmt::skip]
3980 assert_eq_m128i(
3981 r,
3982 _mm_setr_epi8(
3983 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3984 )
3985 );
3986 }
3987
3988 #[simd_test(enable = "sse2")]
3989 const fn test_mm_cmpeq_epi16() {
3990 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3991 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3992 let r = _mm_cmpeq_epi16(a, b);
3993 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3994 }
3995
3996 #[simd_test(enable = "sse2")]
3997 const fn test_mm_cmpeq_epi32() {
3998 let a = _mm_setr_epi32(0, 1, 2, 3);
3999 let b = _mm_setr_epi32(3, 2, 2, 0);
4000 let r = _mm_cmpeq_epi32(a, b);
4001 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
4002 }
4003
4004 #[simd_test(enable = "sse2")]
4005 const fn test_mm_cmpgt_epi8() {
4006 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4007 let b = _mm_set1_epi8(0);
4008 let r = _mm_cmpgt_epi8(a, b);
4009 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4010 assert_eq_m128i(r, e);
4011 }
4012
4013 #[simd_test(enable = "sse2")]
4014 const fn test_mm_cmpgt_epi16() {
4015 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4016 let b = _mm_set1_epi16(0);
4017 let r = _mm_cmpgt_epi16(a, b);
4018 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4019 assert_eq_m128i(r, e);
4020 }
4021
4022 #[simd_test(enable = "sse2")]
4023 const fn test_mm_cmpgt_epi32() {
4024 let a = _mm_set_epi32(5, 0, 0, 0);
4025 let b = _mm_set1_epi32(0);
4026 let r = _mm_cmpgt_epi32(a, b);
4027 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4028 }
4029
4030 #[simd_test(enable = "sse2")]
4031 const fn test_mm_cmplt_epi8() {
4032 let a = _mm_set1_epi8(0);
4033 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4034 let r = _mm_cmplt_epi8(a, b);
4035 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4036 assert_eq_m128i(r, e);
4037 }
4038
4039 #[simd_test(enable = "sse2")]
4040 const fn test_mm_cmplt_epi16() {
4041 let a = _mm_set1_epi16(0);
4042 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4043 let r = _mm_cmplt_epi16(a, b);
4044 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4045 assert_eq_m128i(r, e);
4046 }
4047
4048 #[simd_test(enable = "sse2")]
4049 const fn test_mm_cmplt_epi32() {
4050 let a = _mm_set1_epi32(0);
4051 let b = _mm_set_epi32(5, 0, 0, 0);
4052 let r = _mm_cmplt_epi32(a, b);
4053 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4054 }
4055
4056 #[simd_test(enable = "sse2")]
4057 const fn test_mm_cvtepi32_pd() {
4058 let a = _mm_set_epi32(35, 25, 15, 5);
4059 let r = _mm_cvtepi32_pd(a);
4060 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
4061 }
4062
4063 #[simd_test(enable = "sse2")]
4064 const fn test_mm_cvtsi32_sd() {
4065 let a = _mm_set1_pd(3.5);
4066 let r = _mm_cvtsi32_sd(a, 5);
4067 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
4068 }
4069
4070 #[simd_test(enable = "sse2")]
4071 const fn test_mm_cvtepi32_ps() {
4072 let a = _mm_setr_epi32(1, 2, 3, 4);
4073 let r = _mm_cvtepi32_ps(a);
4074 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
4075 }
4076
4077 #[simd_test(enable = "sse2")]
4078 unsafe fn test_mm_cvtps_epi32() {
4079 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4080 let r = _mm_cvtps_epi32(a);
4081 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
4082 }
4083
4084 #[simd_test(enable = "sse2")]
4085 const fn test_mm_cvtsi32_si128() {
4086 let r = _mm_cvtsi32_si128(5);
4087 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
4088 }
4089
4090 #[simd_test(enable = "sse2")]
4091 const fn test_mm_cvtsi128_si32() {
4092 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
4093 assert_eq!(r, 5);
4094 }
4095
4096 #[simd_test(enable = "sse2")]
4097 const fn test_mm_set_epi64x() {
4098 let r = _mm_set_epi64x(0, 1);
4099 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
4100 }
4101
4102 #[simd_test(enable = "sse2")]
4103 const fn test_mm_set_epi32() {
4104 let r = _mm_set_epi32(0, 1, 2, 3);
4105 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
4106 }
4107
4108 #[simd_test(enable = "sse2")]
4109 const fn test_mm_set_epi16() {
4110 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4111 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
4112 }
4113
4114 #[simd_test(enable = "sse2")]
4115 const fn test_mm_set_epi8() {
4116 #[rustfmt::skip]
4117 let r = _mm_set_epi8(
4118 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4119 );
4120 #[rustfmt::skip]
4121 let e = _mm_setr_epi8(
4122 15, 14, 13, 12, 11, 10, 9, 8,
4123 7, 6, 5, 4, 3, 2, 1, 0,
4124 );
4125 assert_eq_m128i(r, e);
4126 }
4127
4128 #[simd_test(enable = "sse2")]
4129 const fn test_mm_set1_epi64x() {
4130 let r = _mm_set1_epi64x(1);
4131 assert_eq_m128i(r, _mm_set1_epi64x(1));
4132 }
4133
4134 #[simd_test(enable = "sse2")]
4135 const fn test_mm_set1_epi32() {
4136 let r = _mm_set1_epi32(1);
4137 assert_eq_m128i(r, _mm_set1_epi32(1));
4138 }
4139
4140 #[simd_test(enable = "sse2")]
4141 const fn test_mm_set1_epi16() {
4142 let r = _mm_set1_epi16(1);
4143 assert_eq_m128i(r, _mm_set1_epi16(1));
4144 }
4145
4146 #[simd_test(enable = "sse2")]
4147 const fn test_mm_set1_epi8() {
4148 let r = _mm_set1_epi8(1);
4149 assert_eq_m128i(r, _mm_set1_epi8(1));
4150 }
4151
4152 #[simd_test(enable = "sse2")]
4153 const fn test_mm_setr_epi32() {
4154 let r = _mm_setr_epi32(0, 1, 2, 3);
4155 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4156 }
4157
4158 #[simd_test(enable = "sse2")]
4159 const fn test_mm_setr_epi16() {
4160 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4161 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4162 }
4163
4164 #[simd_test(enable = "sse2")]
4165 const fn test_mm_setr_epi8() {
4166 #[rustfmt::skip]
4167 let r = _mm_setr_epi8(
4168 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4169 );
4170 #[rustfmt::skip]
4171 let e = _mm_setr_epi8(
4172 0, 1, 2, 3, 4, 5, 6, 7,
4173 8, 9, 10, 11, 12, 13, 14, 15,
4174 );
4175 assert_eq_m128i(r, e);
4176 }
4177
4178 #[simd_test(enable = "sse2")]
4179 const fn test_mm_setzero_si128() {
4180 let r = _mm_setzero_si128();
4181 assert_eq_m128i(r, _mm_set1_epi64x(0));
4182 }
4183
4184 #[simd_test(enable = "sse2")]
4185 const unsafe fn test_mm_loadl_epi64() {
4186 let a = _mm_setr_epi64x(6, 5);
4187 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4188 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4189 }
4190
4191 #[simd_test(enable = "sse2")]
4192 const unsafe fn test_mm_load_si128() {
4193 let a = _mm_set_epi64x(5, 6);
4194 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4195 assert_eq_m128i(a, r);
4196 }
4197
4198 #[simd_test(enable = "sse2")]
4199 const unsafe fn test_mm_loadu_si128() {
4200 let a = _mm_set_epi64x(5, 6);
4201 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4202 assert_eq_m128i(a, r);
4203 }
4204
4205 #[simd_test(enable = "sse2")]
4206 #[cfg_attr(miri, ignore)]
4209 unsafe fn test_mm_maskmoveu_si128() {
4210 let a = _mm_set1_epi8(9);
4211 #[rustfmt::skip]
4212 let mask = _mm_set_epi8(
4213 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4214 0, 0, 0, 0, 0, 0, 0, 0,
4215 );
4216 let mut r = _mm_set1_epi8(0);
4217 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4218 _mm_sfence();
4219 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4220 assert_eq_m128i(r, e);
4221 }
4222
4223 #[simd_test(enable = "sse2")]
4224 const unsafe fn test_mm_store_si128() {
4225 let a = _mm_set1_epi8(9);
4226 let mut r = _mm_set1_epi8(0);
4227 _mm_store_si128(&mut r, a);
4228 assert_eq_m128i(r, a);
4229 }
4230
4231 #[simd_test(enable = "sse2")]
4232 const unsafe fn test_mm_storeu_si128() {
4233 let a = _mm_set1_epi8(9);
4234 let mut r = _mm_set1_epi8(0);
4235 _mm_storeu_si128(&mut r, a);
4236 assert_eq_m128i(r, a);
4237 }
4238
4239 #[simd_test(enable = "sse2")]
4240 const unsafe fn test_mm_storel_epi64() {
4241 let a = _mm_setr_epi64x(2, 9);
4242 let mut r = _mm_set1_epi8(0);
4243 _mm_storel_epi64(&mut r, a);
4244 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4245 }
4246
4247 #[simd_test(enable = "sse2")]
4248 #[cfg_attr(miri, ignore)]
4251 unsafe fn test_mm_stream_si128() {
4252 let a = _mm_setr_epi32(1, 2, 3, 4);
4253 let mut r = _mm_undefined_si128();
4254 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4255 _mm_sfence();
4256 assert_eq_m128i(r, a);
4257 }
4258
4259 #[simd_test(enable = "sse2")]
4260 #[cfg_attr(miri, ignore)]
4263 unsafe fn test_mm_stream_si32() {
4264 let a: i32 = 7;
4265 let mut mem = boxed::Box::<i32>::new(-1);
4266 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4267 _mm_sfence();
4268 assert_eq!(a, *mem);
4269 }
4270
4271 #[simd_test(enable = "sse2")]
4272 const fn test_mm_move_epi64() {
4273 let a = _mm_setr_epi64x(5, 6);
4274 let r = _mm_move_epi64(a);
4275 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4276 }
4277
4278 #[simd_test(enable = "sse2")]
4279 fn test_mm_packs_epi16() {
4280 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4281 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4282 let r = _mm_packs_epi16(a, b);
4283 #[rustfmt::skip]
4284 assert_eq_m128i(
4285 r,
4286 _mm_setr_epi8(
4287 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4288 )
4289 );
4290 }
4291
4292 #[simd_test(enable = "sse2")]
4293 fn test_mm_packs_epi32() {
4294 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4295 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4296 let r = _mm_packs_epi32(a, b);
4297 assert_eq_m128i(
4298 r,
4299 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4300 );
4301 }
4302
4303 #[simd_test(enable = "sse2")]
4304 fn test_mm_packus_epi16() {
4305 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4306 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4307 let r = _mm_packus_epi16(a, b);
4308 assert_eq_m128i(
4309 r,
4310 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4311 );
4312 }
4313
4314 #[simd_test(enable = "sse2")]
4315 const fn test_mm_extract_epi16() {
4316 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4317 let r1 = _mm_extract_epi16::<0>(a);
4318 let r2 = _mm_extract_epi16::<3>(a);
4319 assert_eq!(r1, 0xFFFF);
4320 assert_eq!(r2, 3);
4321 }
4322
4323 #[simd_test(enable = "sse2")]
4324 const fn test_mm_insert_epi16() {
4325 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4326 let r = _mm_insert_epi16::<0>(a, 9);
4327 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4328 assert_eq_m128i(r, e);
4329 }
4330
4331 #[simd_test(enable = "sse2")]
4332 const fn test_mm_movemask_epi8() {
4333 #[rustfmt::skip]
4334 let a = _mm_setr_epi8(
4335 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4336 0b0101, 0b1111_0000u8 as i8, 0, 0,
4337 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4338 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4339 );
4340 let r = _mm_movemask_epi8(a);
4341 assert_eq!(r, 0b10100110_00100101);
4342 }
4343
4344 #[simd_test(enable = "sse2")]
4345 const fn test_mm_shuffle_epi32() {
4346 let a = _mm_setr_epi32(5, 10, 15, 20);
4347 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4348 let e = _mm_setr_epi32(20, 10, 10, 5);
4349 assert_eq_m128i(r, e);
4350 }
4351
4352 #[simd_test(enable = "sse2")]
4353 const fn test_mm_shufflehi_epi16() {
4354 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4355 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4356 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4357 assert_eq_m128i(r, e);
4358 }
4359
4360 #[simd_test(enable = "sse2")]
4361 const fn test_mm_shufflelo_epi16() {
4362 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4363 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4364 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4365 assert_eq_m128i(r, e);
4366 }
4367
4368 #[simd_test(enable = "sse2")]
4369 const fn test_mm_unpackhi_epi8() {
4370 #[rustfmt::skip]
4371 let a = _mm_setr_epi8(
4372 0, 1, 2, 3, 4, 5, 6, 7,
4373 8, 9, 10, 11, 12, 13, 14, 15,
4374 );
4375 #[rustfmt::skip]
4376 let b = _mm_setr_epi8(
4377 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4378 );
4379 let r = _mm_unpackhi_epi8(a, b);
4380 #[rustfmt::skip]
4381 let e = _mm_setr_epi8(
4382 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4383 );
4384 assert_eq_m128i(r, e);
4385 }
4386
4387 #[simd_test(enable = "sse2")]
4388 const fn test_mm_unpackhi_epi16() {
4389 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4390 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4391 let r = _mm_unpackhi_epi16(a, b);
4392 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4393 assert_eq_m128i(r, e);
4394 }
4395
4396 #[simd_test(enable = "sse2")]
4397 const fn test_mm_unpackhi_epi32() {
4398 let a = _mm_setr_epi32(0, 1, 2, 3);
4399 let b = _mm_setr_epi32(4, 5, 6, 7);
4400 let r = _mm_unpackhi_epi32(a, b);
4401 let e = _mm_setr_epi32(2, 6, 3, 7);
4402 assert_eq_m128i(r, e);
4403 }
4404
4405 #[simd_test(enable = "sse2")]
4406 const fn test_mm_unpackhi_epi64() {
4407 let a = _mm_setr_epi64x(0, 1);
4408 let b = _mm_setr_epi64x(2, 3);
4409 let r = _mm_unpackhi_epi64(a, b);
4410 let e = _mm_setr_epi64x(1, 3);
4411 assert_eq_m128i(r, e);
4412 }
4413
4414 #[simd_test(enable = "sse2")]
4415 const fn test_mm_unpacklo_epi8() {
4416 #[rustfmt::skip]
4417 let a = _mm_setr_epi8(
4418 0, 1, 2, 3, 4, 5, 6, 7,
4419 8, 9, 10, 11, 12, 13, 14, 15,
4420 );
4421 #[rustfmt::skip]
4422 let b = _mm_setr_epi8(
4423 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4424 );
4425 let r = _mm_unpacklo_epi8(a, b);
4426 #[rustfmt::skip]
4427 let e = _mm_setr_epi8(
4428 0, 16, 1, 17, 2, 18, 3, 19,
4429 4, 20, 5, 21, 6, 22, 7, 23,
4430 );
4431 assert_eq_m128i(r, e);
4432 }
4433
4434 #[simd_test(enable = "sse2")]
4435 const fn test_mm_unpacklo_epi16() {
4436 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4437 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4438 let r = _mm_unpacklo_epi16(a, b);
4439 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4440 assert_eq_m128i(r, e);
4441 }
4442
4443 #[simd_test(enable = "sse2")]
4444 const fn test_mm_unpacklo_epi32() {
4445 let a = _mm_setr_epi32(0, 1, 2, 3);
4446 let b = _mm_setr_epi32(4, 5, 6, 7);
4447 let r = _mm_unpacklo_epi32(a, b);
4448 let e = _mm_setr_epi32(0, 4, 1, 5);
4449 assert_eq_m128i(r, e);
4450 }
4451
4452 #[simd_test(enable = "sse2")]
4453 const fn test_mm_unpacklo_epi64() {
4454 let a = _mm_setr_epi64x(0, 1);
4455 let b = _mm_setr_epi64x(2, 3);
4456 let r = _mm_unpacklo_epi64(a, b);
4457 let e = _mm_setr_epi64x(0, 2);
4458 assert_eq_m128i(r, e);
4459 }
4460
4461 #[simd_test(enable = "sse2")]
4462 const fn test_mm_add_sd() {
4463 let a = _mm_setr_pd(1.0, 2.0);
4464 let b = _mm_setr_pd(5.0, 10.0);
4465 let r = _mm_add_sd(a, b);
4466 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4467 }
4468
4469 #[simd_test(enable = "sse2")]
4470 const fn test_mm_add_pd() {
4471 let a = _mm_setr_pd(1.0, 2.0);
4472 let b = _mm_setr_pd(5.0, 10.0);
4473 let r = _mm_add_pd(a, b);
4474 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4475 }
4476
4477 #[simd_test(enable = "sse2")]
4478 const fn test_mm_div_sd() {
4479 let a = _mm_setr_pd(1.0, 2.0);
4480 let b = _mm_setr_pd(5.0, 10.0);
4481 let r = _mm_div_sd(a, b);
4482 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4483 }
4484
4485 #[simd_test(enable = "sse2")]
4486 const fn test_mm_div_pd() {
4487 let a = _mm_setr_pd(1.0, 2.0);
4488 let b = _mm_setr_pd(5.0, 10.0);
4489 let r = _mm_div_pd(a, b);
4490 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4491 }
4492
4493 #[simd_test(enable = "sse2")]
4494 fn test_mm_max_sd() {
4495 let a = _mm_setr_pd(1.0, 2.0);
4496 let b = _mm_setr_pd(5.0, 10.0);
4497 let r = _mm_max_sd(a, b);
4498 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4499 }
4500
4501 #[simd_test(enable = "sse2")]
4502 fn test_mm_max_pd() {
4503 let a = _mm_setr_pd(1.0, 2.0);
4504 let b = _mm_setr_pd(5.0, 10.0);
4505 let r = _mm_max_pd(a, b);
4506 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4507
4508 let a = _mm_setr_pd(-0.0, 0.0);
4510 let b = _mm_setr_pd(0.0, 0.0);
4511 let r1 = _mm_castpd_si128(_mm_max_pd(a, b));
4513 let r2 = _mm_castpd_si128(_mm_max_pd(b, a));
4514 let a = _mm_castpd_si128(a);
4515 let b = _mm_castpd_si128(b);
4516 assert_eq_m128i(r1, b);
4517 assert_eq_m128i(r2, a);
4518 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4520
4521 #[simd_test(enable = "sse2")]
4522 fn test_mm_min_sd() {
4523 let a = _mm_setr_pd(1.0, 2.0);
4524 let b = _mm_setr_pd(5.0, 10.0);
4525 let r = _mm_min_sd(a, b);
4526 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4527 }
4528
4529 #[simd_test(enable = "sse2")]
4530 fn test_mm_min_pd() {
4531 let a = _mm_setr_pd(1.0, 2.0);
4532 let b = _mm_setr_pd(5.0, 10.0);
4533 let r = _mm_min_pd(a, b);
4534 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4535
4536 let a = _mm_setr_pd(-0.0, 0.0);
4538 let b = _mm_setr_pd(0.0, 0.0);
4539 let r1 = _mm_castpd_si128(_mm_min_pd(a, b));
4541 let r2 = _mm_castpd_si128(_mm_min_pd(b, a));
4542 let a = _mm_castpd_si128(a);
4543 let b = _mm_castpd_si128(b);
4544 assert_eq_m128i(r1, b);
4545 assert_eq_m128i(r2, a);
4546 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4548
4549 #[simd_test(enable = "sse2")]
4550 const fn test_mm_mul_sd() {
4551 let a = _mm_setr_pd(1.0, 2.0);
4552 let b = _mm_setr_pd(5.0, 10.0);
4553 let r = _mm_mul_sd(a, b);
4554 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4555 }
4556
4557 #[simd_test(enable = "sse2")]
4558 const fn test_mm_mul_pd() {
4559 let a = _mm_setr_pd(1.0, 2.0);
4560 let b = _mm_setr_pd(5.0, 10.0);
4561 let r = _mm_mul_pd(a, b);
4562 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4563 }
4564
4565 #[simd_test(enable = "sse2")]
4566 fn test_mm_sqrt_sd() {
4567 let a = _mm_setr_pd(1.0, 2.0);
4568 let b = _mm_setr_pd(5.0, 10.0);
4569 let r = _mm_sqrt_sd(a, b);
4570 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4571 }
4572
4573 #[simd_test(enable = "sse2")]
4574 fn test_mm_sqrt_pd() {
4575 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4576 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4577 }
4578
4579 #[simd_test(enable = "sse2")]
4580 const fn test_mm_sub_sd() {
4581 let a = _mm_setr_pd(1.0, 2.0);
4582 let b = _mm_setr_pd(5.0, 10.0);
4583 let r = _mm_sub_sd(a, b);
4584 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4585 }
4586
4587 #[simd_test(enable = "sse2")]
4588 const fn test_mm_sub_pd() {
4589 let a = _mm_setr_pd(1.0, 2.0);
4590 let b = _mm_setr_pd(5.0, 10.0);
4591 let r = _mm_sub_pd(a, b);
4592 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4593 }
4594
4595 #[simd_test(enable = "sse2")]
4596 const unsafe fn test_mm_and_pd() {
4597 let a = transmute(u64x2::splat(5));
4598 let b = transmute(u64x2::splat(3));
4599 let r = _mm_and_pd(a, b);
4600 let e = transmute(u64x2::splat(1));
4601 assert_eq_m128d(r, e);
4602 }
4603
4604 #[simd_test(enable = "sse2")]
4605 const unsafe fn test_mm_andnot_pd() {
4606 let a = transmute(u64x2::splat(5));
4607 let b = transmute(u64x2::splat(3));
4608 let r = _mm_andnot_pd(a, b);
4609 let e = transmute(u64x2::splat(2));
4610 assert_eq_m128d(r, e);
4611 }
4612
4613 #[simd_test(enable = "sse2")]
4614 const unsafe fn test_mm_or_pd() {
4615 let a = transmute(u64x2::splat(5));
4616 let b = transmute(u64x2::splat(3));
4617 let r = _mm_or_pd(a, b);
4618 let e = transmute(u64x2::splat(7));
4619 assert_eq_m128d(r, e);
4620 }
4621
4622 #[simd_test(enable = "sse2")]
4623 const unsafe fn test_mm_xor_pd() {
4624 let a = transmute(u64x2::splat(5));
4625 let b = transmute(u64x2::splat(3));
4626 let r = _mm_xor_pd(a, b);
4627 let e = transmute(u64x2::splat(6));
4628 assert_eq_m128d(r, e);
4629 }
4630
4631 #[simd_test(enable = "sse2")]
4632 fn test_mm_cmpeq_sd() {
4633 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4634 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4635 let r = _mm_castpd_si128(_mm_cmpeq_sd(a, b));
4636 assert_eq_m128i(r, e);
4637 }
4638
4639 #[simd_test(enable = "sse2")]
4640 fn test_mm_cmplt_sd() {
4641 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4642 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4643 let r = _mm_castpd_si128(_mm_cmplt_sd(a, b));
4644 assert_eq_m128i(r, e);
4645 }
4646
4647 #[simd_test(enable = "sse2")]
4648 fn test_mm_cmple_sd() {
4649 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4650 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4651 let r = _mm_castpd_si128(_mm_cmple_sd(a, b));
4652 assert_eq_m128i(r, e);
4653 }
4654
4655 #[simd_test(enable = "sse2")]
4656 fn test_mm_cmpgt_sd() {
4657 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4658 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4659 let r = _mm_castpd_si128(_mm_cmpgt_sd(a, b));
4660 assert_eq_m128i(r, e);
4661 }
4662
4663 #[simd_test(enable = "sse2")]
4664 fn test_mm_cmpge_sd() {
4665 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4666 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4667 let r = _mm_castpd_si128(_mm_cmpge_sd(a, b));
4668 assert_eq_m128i(r, e);
4669 }
4670
4671 #[simd_test(enable = "sse2")]
4672 fn test_mm_cmpord_sd() {
4673 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4674 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4675 let r = _mm_castpd_si128(_mm_cmpord_sd(a, b));
4676 assert_eq_m128i(r, e);
4677 }
4678
4679 #[simd_test(enable = "sse2")]
4680 fn test_mm_cmpunord_sd() {
4681 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4682 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4683 let r = _mm_castpd_si128(_mm_cmpunord_sd(a, b));
4684 assert_eq_m128i(r, e);
4685 }
4686
4687 #[simd_test(enable = "sse2")]
4688 fn test_mm_cmpneq_sd() {
4689 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4690 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4691 let r = _mm_castpd_si128(_mm_cmpneq_sd(a, b));
4692 assert_eq_m128i(r, e);
4693 }
4694
4695 #[simd_test(enable = "sse2")]
4696 fn test_mm_cmpnlt_sd() {
4697 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4698 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4699 let r = _mm_castpd_si128(_mm_cmpnlt_sd(a, b));
4700 assert_eq_m128i(r, e);
4701 }
4702
4703 #[simd_test(enable = "sse2")]
4704 fn test_mm_cmpnle_sd() {
4705 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4706 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4707 let r = _mm_castpd_si128(_mm_cmpnle_sd(a, b));
4708 assert_eq_m128i(r, e);
4709 }
4710
4711 #[simd_test(enable = "sse2")]
4712 fn test_mm_cmpngt_sd() {
4713 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4714 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4715 let r = _mm_castpd_si128(_mm_cmpngt_sd(a, b));
4716 assert_eq_m128i(r, e);
4717 }
4718
4719 #[simd_test(enable = "sse2")]
4720 fn test_mm_cmpnge_sd() {
4721 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4722 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4723 let r = _mm_castpd_si128(_mm_cmpnge_sd(a, b));
4724 assert_eq_m128i(r, e);
4725 }
4726
4727 #[simd_test(enable = "sse2")]
4728 fn test_mm_cmpeq_pd() {
4729 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4730 let e = _mm_setr_epi64x(!0, 0);
4731 let r = _mm_castpd_si128(_mm_cmpeq_pd(a, b));
4732 assert_eq_m128i(r, e);
4733 }
4734
4735 #[simd_test(enable = "sse2")]
4736 fn test_mm_cmplt_pd() {
4737 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4738 let e = _mm_setr_epi64x(0, !0);
4739 let r = _mm_castpd_si128(_mm_cmplt_pd(a, b));
4740 assert_eq_m128i(r, e);
4741 }
4742
4743 #[simd_test(enable = "sse2")]
4744 fn test_mm_cmple_pd() {
4745 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4746 let e = _mm_setr_epi64x(!0, !0);
4747 let r = _mm_castpd_si128(_mm_cmple_pd(a, b));
4748 assert_eq_m128i(r, e);
4749 }
4750
4751 #[simd_test(enable = "sse2")]
4752 fn test_mm_cmpgt_pd() {
4753 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4754 let e = _mm_setr_epi64x(0, 0);
4755 let r = _mm_castpd_si128(_mm_cmpgt_pd(a, b));
4756 assert_eq_m128i(r, e);
4757 }
4758
4759 #[simd_test(enable = "sse2")]
4760 fn test_mm_cmpge_pd() {
4761 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4762 let e = _mm_setr_epi64x(!0, 0);
4763 let r = _mm_castpd_si128(_mm_cmpge_pd(a, b));
4764 assert_eq_m128i(r, e);
4765 }
4766
4767 #[simd_test(enable = "sse2")]
4768 fn test_mm_cmpord_pd() {
4769 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4770 let e = _mm_setr_epi64x(0, !0);
4771 let r = _mm_castpd_si128(_mm_cmpord_pd(a, b));
4772 assert_eq_m128i(r, e);
4773 }
4774
4775 #[simd_test(enable = "sse2")]
4776 fn test_mm_cmpunord_pd() {
4777 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4778 let e = _mm_setr_epi64x(!0, 0);
4779 let r = _mm_castpd_si128(_mm_cmpunord_pd(a, b));
4780 assert_eq_m128i(r, e);
4781 }
4782
4783 #[simd_test(enable = "sse2")]
4784 fn test_mm_cmpneq_pd() {
4785 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4786 let e = _mm_setr_epi64x(!0, !0);
4787 let r = _mm_castpd_si128(_mm_cmpneq_pd(a, b));
4788 assert_eq_m128i(r, e);
4789 }
4790
4791 #[simd_test(enable = "sse2")]
4792 fn test_mm_cmpnlt_pd() {
4793 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4794 let e = _mm_setr_epi64x(0, 0);
4795 let r = _mm_castpd_si128(_mm_cmpnlt_pd(a, b));
4796 assert_eq_m128i(r, e);
4797 }
4798
4799 #[simd_test(enable = "sse2")]
4800 fn test_mm_cmpnle_pd() {
4801 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4802 let e = _mm_setr_epi64x(0, 0);
4803 let r = _mm_castpd_si128(_mm_cmpnle_pd(a, b));
4804 assert_eq_m128i(r, e);
4805 }
4806
4807 #[simd_test(enable = "sse2")]
4808 fn test_mm_cmpngt_pd() {
4809 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4810 let e = _mm_setr_epi64x(0, !0);
4811 let r = _mm_castpd_si128(_mm_cmpngt_pd(a, b));
4812 assert_eq_m128i(r, e);
4813 }
4814
4815 #[simd_test(enable = "sse2")]
4816 fn test_mm_cmpnge_pd() {
4817 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4818 let e = _mm_setr_epi64x(0, !0);
4819 let r = _mm_castpd_si128(_mm_cmpnge_pd(a, b));
4820 assert_eq_m128i(r, e);
4821 }
4822
4823 #[simd_test(enable = "sse2")]
4824 fn test_mm_comieq_sd() {
4825 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4826 assert!(_mm_comieq_sd(a, b) != 0);
4827
4828 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4829 assert!(_mm_comieq_sd(a, b) == 0);
4830 }
4831
4832 #[simd_test(enable = "sse2")]
4833 fn test_mm_comilt_sd() {
4834 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4835 assert!(_mm_comilt_sd(a, b) == 0);
4836 }
4837
4838 #[simd_test(enable = "sse2")]
4839 fn test_mm_comile_sd() {
4840 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4841 assert!(_mm_comile_sd(a, b) != 0);
4842 }
4843
4844 #[simd_test(enable = "sse2")]
4845 fn test_mm_comigt_sd() {
4846 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4847 assert!(_mm_comigt_sd(a, b) == 0);
4848 }
4849
4850 #[simd_test(enable = "sse2")]
4851 fn test_mm_comige_sd() {
4852 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4853 assert!(_mm_comige_sd(a, b) != 0);
4854 }
4855
4856 #[simd_test(enable = "sse2")]
4857 fn test_mm_comineq_sd() {
4858 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4859 assert!(_mm_comineq_sd(a, b) == 0);
4860 }
4861
4862 #[simd_test(enable = "sse2")]
4863 fn test_mm_ucomieq_sd() {
4864 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4865 assert!(_mm_ucomieq_sd(a, b) != 0);
4866
4867 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4868 assert!(_mm_ucomieq_sd(a, b) == 0);
4869 }
4870
4871 #[simd_test(enable = "sse2")]
4872 fn test_mm_ucomilt_sd() {
4873 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4874 assert!(_mm_ucomilt_sd(a, b) == 0);
4875 }
4876
4877 #[simd_test(enable = "sse2")]
4878 fn test_mm_ucomile_sd() {
4879 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4880 assert!(_mm_ucomile_sd(a, b) != 0);
4881 }
4882
4883 #[simd_test(enable = "sse2")]
4884 fn test_mm_ucomigt_sd() {
4885 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4886 assert!(_mm_ucomigt_sd(a, b) == 0);
4887 }
4888
4889 #[simd_test(enable = "sse2")]
4890 fn test_mm_ucomige_sd() {
4891 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4892 assert!(_mm_ucomige_sd(a, b) != 0);
4893 }
4894
4895 #[simd_test(enable = "sse2")]
4896 fn test_mm_ucomineq_sd() {
4897 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4898 assert!(_mm_ucomineq_sd(a, b) == 0);
4899 }
4900
4901 #[simd_test(enable = "sse2")]
4902 const fn test_mm_movemask_pd() {
4903 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4904 assert_eq!(r, 0b01);
4905
4906 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4907 assert_eq!(r, 0b11);
4908 }
4909
4910 #[repr(align(16))]
4911 struct Memory {
4912 data: [f64; 4],
4913 }
4914
4915 #[simd_test(enable = "sse2")]
4916 const unsafe fn test_mm_load_pd() {
4917 let mem = Memory {
4918 data: [1.0f64, 2.0, 3.0, 4.0],
4919 };
4920 let vals = &mem.data;
4921 let d = vals.as_ptr();
4922
4923 let r = _mm_load_pd(d);
4924 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4925 }
4926
4927 #[simd_test(enable = "sse2")]
4928 const unsafe fn test_mm_load_sd() {
4929 let a = 1.;
4930 let expected = _mm_setr_pd(a, 0.);
4931 let r = _mm_load_sd(&a);
4932 assert_eq_m128d(r, expected);
4933 }
4934
4935 #[simd_test(enable = "sse2")]
4936 const unsafe fn test_mm_loadh_pd() {
4937 let a = _mm_setr_pd(1., 2.);
4938 let b = 3.;
4939 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4940 let r = _mm_loadh_pd(a, &b);
4941 assert_eq_m128d(r, expected);
4942 }
4943
4944 #[simd_test(enable = "sse2")]
4945 const unsafe fn test_mm_loadl_pd() {
4946 let a = _mm_setr_pd(1., 2.);
4947 let b = 3.;
4948 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4949 let r = _mm_loadl_pd(a, &b);
4950 assert_eq_m128d(r, expected);
4951 }
4952
4953 #[simd_test(enable = "sse2")]
4954 #[cfg_attr(miri, ignore)]
4957 unsafe fn test_mm_stream_pd() {
4958 #[repr(align(128))]
4959 struct Memory {
4960 pub data: [f64; 2],
4961 }
4962 let a = _mm_set1_pd(7.0);
4963 let mut mem = Memory { data: [-1.0; 2] };
4964
4965 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4966 _mm_sfence();
4967 for i in 0..2 {
4968 assert_eq!(mem.data[i], get_m128d(a, i));
4969 }
4970 }
4971
4972 #[simd_test(enable = "sse2")]
4973 const unsafe fn test_mm_store_sd() {
4974 let mut dest = 0.;
4975 let a = _mm_setr_pd(1., 2.);
4976 _mm_store_sd(&mut dest, a);
4977 assert_eq!(dest, _mm_cvtsd_f64(a));
4978 }
4979
4980 #[simd_test(enable = "sse2")]
4981 const unsafe fn test_mm_store_pd() {
4982 let mut mem = Memory { data: [0.0f64; 4] };
4983 let vals = &mut mem.data;
4984 let a = _mm_setr_pd(1.0, 2.0);
4985 let d = vals.as_mut_ptr();
4986
4987 _mm_store_pd(d, *black_box(&a));
4988 assert_eq!(vals[0], 1.0);
4989 assert_eq!(vals[1], 2.0);
4990 }
4991
4992 #[simd_test(enable = "sse2")]
4993 const unsafe fn test_mm_storeu_pd() {
4994 let mut mem = Memory { data: [0.0f64; 4] };
4996 let vals = &mut mem.data;
4997 let a = _mm_setr_pd(1.0, 2.0);
4998
4999 let p = vals.as_mut_ptr().offset(1);
5001 _mm_storeu_pd(p, *black_box(&a));
5002
5003 assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
5004 }
5005
5006 #[simd_test(enable = "sse2")]
5007 const unsafe fn test_mm_storeu_si16() {
5008 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5009 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
5010 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
5011 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
5012 assert_eq_m128i(r, e);
5013 }
5014
5015 #[simd_test(enable = "sse2")]
5016 const unsafe fn test_mm_storeu_si32() {
5017 let a = _mm_setr_epi32(1, 2, 3, 4);
5018 let mut r = _mm_setr_epi32(5, 6, 7, 8);
5019 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
5020 let e = _mm_setr_epi32(1, 6, 7, 8);
5021 assert_eq_m128i(r, e);
5022 }
5023
5024 #[simd_test(enable = "sse2")]
5025 const unsafe fn test_mm_storeu_si64() {
5026 let a = _mm_setr_epi64x(1, 2);
5027 let mut r = _mm_setr_epi64x(3, 4);
5028 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
5029 let e = _mm_setr_epi64x(1, 4);
5030 assert_eq_m128i(r, e);
5031 }
5032
5033 #[simd_test(enable = "sse2")]
5034 const unsafe fn test_mm_store1_pd() {
5035 let mut mem = Memory { data: [0.0f64; 4] };
5036 let vals = &mut mem.data;
5037 let a = _mm_setr_pd(1.0, 2.0);
5038 let d = vals.as_mut_ptr();
5039
5040 _mm_store1_pd(d, *black_box(&a));
5041 assert_eq!(vals[0], 1.0);
5042 assert_eq!(vals[1], 1.0);
5043 }
5044
5045 #[simd_test(enable = "sse2")]
5046 const unsafe fn test_mm_store_pd1() {
5047 let mut mem = Memory { data: [0.0f64; 4] };
5048 let vals = &mut mem.data;
5049 let a = _mm_setr_pd(1.0, 2.0);
5050 let d = vals.as_mut_ptr();
5051
5052 _mm_store_pd1(d, *black_box(&a));
5053 assert_eq!(vals[0], 1.0);
5054 assert_eq!(vals[1], 1.0);
5055 }
5056
5057 #[simd_test(enable = "sse2")]
5058 const unsafe fn test_mm_storer_pd() {
5059 let mut mem = Memory { data: [0.0f64; 4] };
5060 let vals = &mut mem.data;
5061 let a = _mm_setr_pd(1.0, 2.0);
5062 let d = vals.as_mut_ptr();
5063
5064 _mm_storer_pd(d, *black_box(&a));
5065 assert_eq!(vals[0], 2.0);
5066 assert_eq!(vals[1], 1.0);
5067 }
5068
5069 #[simd_test(enable = "sse2")]
5070 const unsafe fn test_mm_storeh_pd() {
5071 let mut dest = 0.;
5072 let a = _mm_setr_pd(1., 2.);
5073 _mm_storeh_pd(&mut dest, a);
5074 assert_eq!(dest, get_m128d(a, 1));
5075 }
5076
5077 #[simd_test(enable = "sse2")]
5078 const unsafe fn test_mm_storel_pd() {
5079 let mut dest = 0.;
5080 let a = _mm_setr_pd(1., 2.);
5081 _mm_storel_pd(&mut dest, a);
5082 assert_eq!(dest, _mm_cvtsd_f64(a));
5083 }
5084
5085 #[simd_test(enable = "sse2")]
5086 const unsafe fn test_mm_loadr_pd() {
5087 let mut mem = Memory {
5088 data: [1.0f64, 2.0, 3.0, 4.0],
5089 };
5090 let vals = &mut mem.data;
5091 let d = vals.as_ptr();
5092
5093 let r = _mm_loadr_pd(d);
5094 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
5095 }
5096
5097 #[simd_test(enable = "sse2")]
5098 const unsafe fn test_mm_loadu_pd() {
5099 let mut mem = Memory {
5101 data: [1.0f64, 2.0, 3.0, 4.0],
5102 };
5103 let vals = &mut mem.data;
5104
5105 let d = vals.as_ptr().offset(1);
5107
5108 let r = _mm_loadu_pd(d);
5109 let e = _mm_setr_pd(2.0, 3.0);
5110 assert_eq_m128d(r, e);
5111 }
5112
5113 #[simd_test(enable = "sse2")]
5114 const unsafe fn test_mm_loadu_si16() {
5115 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5116 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
5117 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
5118 }
5119
5120 #[simd_test(enable = "sse2")]
5121 const unsafe fn test_mm_loadu_si32() {
5122 let a = _mm_setr_epi32(1, 2, 3, 4);
5123 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
5124 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
5125 }
5126
5127 #[simd_test(enable = "sse2")]
5128 const unsafe fn test_mm_loadu_si64() {
5129 let a = _mm_setr_epi64x(5, 6);
5130 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
5131 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5132 }
5133
5134 #[simd_test(enable = "sse2")]
5135 const fn test_mm_cvtpd_ps() {
5136 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5137 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5138
5139 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5140 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5141
5142 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5143 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5144
5145 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5146 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5147 }
5148
5149 #[simd_test(enable = "sse2")]
5150 const fn test_mm_cvtps_pd() {
5151 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5152 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5153
5154 let r = _mm_cvtps_pd(_mm_setr_ps(
5155 f32::MAX,
5156 f32::INFINITY,
5157 f32::NEG_INFINITY,
5158 f32::MIN,
5159 ));
5160 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5161 }
5162
5163 #[simd_test(enable = "sse2")]
5164 fn test_mm_cvtpd_epi32() {
5165 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5166 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5167
5168 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5169 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5170
5171 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5172 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5173
5174 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5175 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5176
5177 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5178 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5179 }
5180
5181 #[simd_test(enable = "sse2")]
5182 fn test_mm_cvtsd_si32() {
5183 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5184 assert_eq!(r, -2);
5185
5186 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5187 assert_eq!(r, i32::MIN);
5188
5189 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5190 assert_eq!(r, i32::MIN);
5191 }
5192
5193 #[simd_test(enable = "sse2")]
5194 fn test_mm_cvtsd_ss() {
5195 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5196 let b = _mm_setr_pd(2.0, -5.0);
5197
5198 let r = _mm_cvtsd_ss(a, b);
5199
5200 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5201
5202 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5203 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5204
5205 let r = _mm_cvtsd_ss(a, b);
5206
5207 assert_eq_m128(
5208 r,
5209 _mm_setr_ps(
5210 f32::INFINITY,
5211 f32::NEG_INFINITY,
5212 f32::MAX,
5213 f32::NEG_INFINITY,
5214 ),
5215 );
5216 }
5217
5218 #[simd_test(enable = "sse2")]
5219 const fn test_mm_cvtsd_f64() {
5220 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5221 assert_eq!(r, -1.1);
5222 }
5223
5224 #[simd_test(enable = "sse2")]
5225 const fn test_mm_cvtss_sd() {
5226 let a = _mm_setr_pd(-1.1, 2.2);
5227 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5228
5229 let r = _mm_cvtss_sd(a, b);
5230 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5231
5232 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5233 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5234
5235 let r = _mm_cvtss_sd(a, b);
5236 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5237 }
5238
5239 #[simd_test(enable = "sse2")]
5240 fn test_mm_cvttpd_epi32() {
5241 let a = _mm_setr_pd(-1.1, 2.2);
5242 let r = _mm_cvttpd_epi32(a);
5243 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5244
5245 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5246 let r = _mm_cvttpd_epi32(a);
5247 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5248 }
5249
5250 #[simd_test(enable = "sse2")]
5251 fn test_mm_cvttsd_si32() {
5252 let a = _mm_setr_pd(-1.1, 2.2);
5253 let r = _mm_cvttsd_si32(a);
5254 assert_eq!(r, -1);
5255
5256 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5257 let r = _mm_cvttsd_si32(a);
5258 assert_eq!(r, i32::MIN);
5259 }
5260
5261 #[simd_test(enable = "sse2")]
5262 fn test_mm_cvttps_epi32() {
5263 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5264 let r = _mm_cvttps_epi32(a);
5265 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5266
5267 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5268 let r = _mm_cvttps_epi32(a);
5269 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5270 }
5271
5272 #[simd_test(enable = "sse2")]
5273 const fn test_mm_set_sd() {
5274 let r = _mm_set_sd(-1.0_f64);
5275 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5276 }
5277
5278 #[simd_test(enable = "sse2")]
5279 const fn test_mm_set1_pd() {
5280 let r = _mm_set1_pd(-1.0_f64);
5281 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5282 }
5283
5284 #[simd_test(enable = "sse2")]
5285 const fn test_mm_set_pd1() {
5286 let r = _mm_set_pd1(-2.0_f64);
5287 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5288 }
5289
5290 #[simd_test(enable = "sse2")]
5291 const fn test_mm_set_pd() {
5292 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5293 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5294 }
5295
5296 #[simd_test(enable = "sse2")]
5297 const fn test_mm_setr_pd() {
5298 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5299 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5300 }
5301
5302 #[simd_test(enable = "sse2")]
5303 const fn test_mm_setzero_pd() {
5304 let r = _mm_setzero_pd();
5305 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5306 }
5307
5308 #[simd_test(enable = "sse2")]
5309 const unsafe fn test_mm_load1_pd() {
5310 let d = -5.0;
5311 let r = _mm_load1_pd(&d);
5312 assert_eq_m128d(r, _mm_setr_pd(d, d));
5313 }
5314
5315 #[simd_test(enable = "sse2")]
5316 const unsafe fn test_mm_load_pd1() {
5317 let d = -5.0;
5318 let r = _mm_load_pd1(&d);
5319 assert_eq_m128d(r, _mm_setr_pd(d, d));
5320 }
5321
5322 #[simd_test(enable = "sse2")]
5323 const fn test_mm_unpackhi_pd() {
5324 let a = _mm_setr_pd(1.0, 2.0);
5325 let b = _mm_setr_pd(3.0, 4.0);
5326 let r = _mm_unpackhi_pd(a, b);
5327 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5328 }
5329
5330 #[simd_test(enable = "sse2")]
5331 const fn test_mm_unpacklo_pd() {
5332 let a = _mm_setr_pd(1.0, 2.0);
5333 let b = _mm_setr_pd(3.0, 4.0);
5334 let r = _mm_unpacklo_pd(a, b);
5335 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5336 }
5337
5338 #[simd_test(enable = "sse2")]
5339 const fn test_mm_shuffle_pd() {
5340 let a = _mm_setr_pd(1., 2.);
5341 let b = _mm_setr_pd(3., 4.);
5342 let expected = _mm_setr_pd(1., 3.);
5343 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5344 assert_eq_m128d(r, expected);
5345 }
5346
5347 #[simd_test(enable = "sse2")]
5348 const fn test_mm_move_sd() {
5349 let a = _mm_setr_pd(1., 2.);
5350 let b = _mm_setr_pd(3., 4.);
5351 let expected = _mm_setr_pd(3., 2.);
5352 let r = _mm_move_sd(a, b);
5353 assert_eq_m128d(r, expected);
5354 }
5355
5356 #[simd_test(enable = "sse2")]
5357 const fn test_mm_castpd_ps() {
5358 let a = _mm_set1_pd(0.);
5359 let expected = _mm_set1_ps(0.);
5360 let r = _mm_castpd_ps(a);
5361 assert_eq_m128(r, expected);
5362 }
5363
5364 #[simd_test(enable = "sse2")]
5365 const fn test_mm_castpd_si128() {
5366 let a = _mm_set1_pd(0.);
5367 let expected = _mm_set1_epi64x(0);
5368 let r = _mm_castpd_si128(a);
5369 assert_eq_m128i(r, expected);
5370 }
5371
5372 #[simd_test(enable = "sse2")]
5373 const fn test_mm_castps_pd() {
5374 let a = _mm_set1_ps(0.);
5375 let expected = _mm_set1_pd(0.);
5376 let r = _mm_castps_pd(a);
5377 assert_eq_m128d(r, expected);
5378 }
5379
5380 #[simd_test(enable = "sse2")]
5381 const fn test_mm_castps_si128() {
5382 let a = _mm_set1_ps(0.);
5383 let expected = _mm_set1_epi32(0);
5384 let r = _mm_castps_si128(a);
5385 assert_eq_m128i(r, expected);
5386 }
5387
5388 #[simd_test(enable = "sse2")]
5389 const fn test_mm_castsi128_pd() {
5390 let a = _mm_set1_epi64x(0);
5391 let expected = _mm_set1_pd(0.);
5392 let r = _mm_castsi128_pd(a);
5393 assert_eq_m128d(r, expected);
5394 }
5395
5396 #[simd_test(enable = "sse2")]
5397 const fn test_mm_castsi128_ps() {
5398 let a = _mm_set1_epi32(0);
5399 let expected = _mm_set1_ps(0.);
5400 let r = _mm_castsi128_ps(a);
5401 assert_eq_m128(r, expected);
5402 }
5403}