std_detect/detect/os/x86.rs
1//! x86 run-time feature detection is OS independent.
2
3#[cfg(target_arch = "x86")]
4use core::arch::x86::*;
5#[cfg(target_arch = "x86_64")]
6use core::arch::x86_64::*;
7use core::mem;
8
9use crate::detect::{Feature, bit, cache};
10
11/// Run-time feature detection on x86 works by using the CPUID instruction.
12///
13/// The [CPUID Wikipedia page][wiki_cpuid] contains
14/// all the information about which flags to set to query which values, and in
15/// which registers these are reported.
16///
17/// The definitive references are:
18/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
19/// Instruction Set Reference, A-Z][intel64_ref].
20/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
21/// System Instructions][amd64_ref].
22///
23/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
24/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
25/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
26#[allow(clippy::similar_names)]
27pub(crate) fn detect_features() -> cache::Initializer {
28 let mut value = cache::Initializer::default();
29
30 if cfg!(target_env = "sgx") {
31 // doesn't support this because it is untrusted data
32 return value;
33 }
34
35 // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
36 // has `cpuid` support.
37
38 // 0. EAX = 0: Basic Information:
39 // - EAX returns the "Highest Function Parameter", that is, the maximum
40 // leaf value for subsequent calls of `cpuinfo` in range [0,
41 // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
42 // returned in EBX, EDX, and ECX (in that order):
43 let (max_basic_leaf, vendor_id) = unsafe {
44 let CpuidResult { eax: max_basic_leaf, ebx, ecx, edx } = __cpuid(0);
45 let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()];
46 let vendor_id: [u8; 12] = mem::transmute(vendor_id);
47 (max_basic_leaf, vendor_id)
48 };
49
50 if max_basic_leaf < 1 {
51 // Earlier Intel 486, CPUID not implemented
52 return value;
53 }
54
55 // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
56 // Contains information about most x86 features.
57 let CpuidResult { ecx: proc_info_ecx, edx: proc_info_edx, .. } =
58 unsafe { __cpuid(0x0000_0001_u32) };
59
60 // EAX = 7: Queries "Extended Features";
61 // Contains information about bmi,bmi2, and avx2 support.
62 let (
63 extended_features_ebx,
64 extended_features_ecx,
65 extended_features_edx,
66 extended_features_eax_leaf_1,
67 extended_features_edx_leaf_1,
68 ) = if max_basic_leaf >= 7 {
69 let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
70 let CpuidResult { eax: eax_1, edx: edx_1, .. } =
71 unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
72 (ebx, ecx, edx, eax_1, edx_1)
73 } else {
74 (0, 0, 0, 0, 0) // CPUID does not support "Extended Features"
75 };
76
77 // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
78 // - EAX returns the max leaf value for extended information, that is,
79 // `cpuid` calls in range [0x8000_0000; u32::MAX]:
80 let CpuidResult { eax: extended_max_basic_leaf, .. } = unsafe { __cpuid(0x8000_0000_u32) };
81
82 // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
83 // Bits"
84 let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
85 let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
86 ecx
87 } else {
88 0
89 };
90
91 {
92 // borrows value till the end of this scope:
93 let mut enable = |r, rb, f| {
94 let present = bit::test(r as usize, rb);
95 if present {
96 value.set(f as u32);
97 }
98 present
99 };
100
101 enable(proc_info_ecx, 0, Feature::sse3);
102 enable(proc_info_ecx, 1, Feature::pclmulqdq);
103 enable(proc_info_ecx, 9, Feature::ssse3);
104 enable(proc_info_ecx, 13, Feature::cmpxchg16b);
105 enable(proc_info_ecx, 19, Feature::sse4_1);
106 enable(proc_info_ecx, 20, Feature::sse4_2);
107 enable(proc_info_ecx, 22, Feature::movbe);
108 enable(proc_info_ecx, 23, Feature::popcnt);
109 enable(proc_info_ecx, 25, Feature::aes);
110 let f16c = enable(proc_info_ecx, 29, Feature::f16c);
111 enable(proc_info_ecx, 30, Feature::rdrand);
112 enable(extended_features_ebx, 18, Feature::rdseed);
113 enable(extended_features_ebx, 19, Feature::adx);
114 enable(extended_features_ebx, 11, Feature::rtm);
115 enable(proc_info_edx, 4, Feature::tsc);
116 enable(proc_info_edx, 23, Feature::mmx);
117 enable(proc_info_edx, 24, Feature::fxsr);
118 enable(proc_info_edx, 25, Feature::sse);
119 enable(proc_info_edx, 26, Feature::sse2);
120 enable(extended_features_ebx, 29, Feature::sha);
121
122 enable(extended_features_ecx, 8, Feature::gfni);
123 enable(extended_features_ecx, 9, Feature::vaes);
124 enable(extended_features_ecx, 10, Feature::vpclmulqdq);
125
126 enable(extended_features_ebx, 3, Feature::bmi1);
127 enable(extended_features_ebx, 8, Feature::bmi2);
128
129 enable(extended_features_ebx, 9, Feature::ermsb);
130
131 enable(extended_features_eax_leaf_1, 31, Feature::movrs);
132
133 // Detect if CPUID.19h available
134 if bit::test(extended_features_ecx as usize, 23) {
135 let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) };
136 enable(ebx, 0, Feature::kl);
137 enable(ebx, 2, Feature::widekl);
138 }
139
140 // This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
141 // On intel CPUs with popcnt, lzcnt implements the
142 // "missing part" of ABM, so we map both to the same
143 // internal feature.
144 //
145 // The `is_x86_feature_detected!("lzcnt")` macro then
146 // internally maps to Feature::abm.
147 enable(extended_proc_info_ecx, 5, Feature::lzcnt);
148
149 // As Hygon Dhyana originates from AMD technology and shares most of the architecture with
150 // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
151 // number(Family 18h).
152 //
153 // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
154 // family 17h.
155 //
156 // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
157 // Related Hygon kernel patch can be found on
158 // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
159 if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
160 // These features are available on AMD arch CPUs:
161 enable(extended_proc_info_ecx, 6, Feature::sse4a);
162 enable(extended_proc_info_ecx, 21, Feature::tbm);
163 enable(extended_proc_info_ecx, 11, Feature::xop);
164 }
165
166 // `XSAVE` and `AVX` support:
167 let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
168 if cpu_xsave {
169 // 0. Here the CPU supports `XSAVE`.
170
171 // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
172 // supports saving the state of the AVX/AVX2 vector registers on
173 // context-switches, see:
174 //
175 // - [intel: is avx enabled?][is_avx_enabled],
176 // - [mozilla: sse.cpp][mozilla_sse_cpp].
177 //
178 // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
179 // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
180 let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
181
182 if cpu_osxsave {
183 // 2. The OS must have signaled the CPU that it supports saving and
184 // restoring the:
185 //
186 // * SSE -> `XCR0.SSE[1]`
187 // * AVX -> `XCR0.AVX[2]`
188 // * AVX-512 -> `XCR0.AVX-512[7:5]`.
189 // * AMX -> `XCR0.AMX[18:17]`
190 // * APX -> `XCR0.APX[19]`
191 //
192 // by setting the corresponding bits of `XCR0` to `1`.
193 //
194 // This is safe because the CPU supports `xsave`
195 // and the OS has set `osxsave`.
196 let xcr0 = unsafe { _xgetbv(0) };
197 // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
198 let os_avx_support = xcr0 & 6 == 6;
199 // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`:
200 let os_avx512_support = xcr0 & 0xe0 == 0xe0;
201 // Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000`
202 let os_amx_support = xcr0 & 0x60000 == 0x60000;
203 // Test `XCR0.APX[19]` with the mask `0b1000_0000_0000_0000_0000 == 0x80000`
204 let os_apx_support = xcr0 & 0x80000 == 0x80000;
205
206 // Only if the OS and the CPU support saving/restoring the AVX
207 // registers we enable `xsave` support:
208 if os_avx_support {
209 // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
210 // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
211 // Developer’s Manual, Volume 1: Basic Architecture":
212 //
213 // "Software enables the XSAVE feature set by setting
214 // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
215 // instruction). If this bit is 0, execution of any of XGETBV,
216 // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
217 // causes an invalid-opcode exception (#UD)"
218 //
219 enable(proc_info_ecx, 26, Feature::xsave);
220
221 // For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
222 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
223 // ECX = 1):
224 if max_basic_leaf >= 0xd {
225 let CpuidResult { eax: proc_extended_state1_eax, .. } =
226 unsafe { __cpuid_count(0xd_u32, 1) };
227 enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
228 enable(proc_extended_state1_eax, 1, Feature::xsavec);
229 enable(proc_extended_state1_eax, 3, Feature::xsaves);
230 }
231
232 // FMA (uses 256-bit wide registers):
233 let fma = enable(proc_info_ecx, 12, Feature::fma);
234
235 // And AVX/AVX2:
236 enable(proc_info_ecx, 28, Feature::avx);
237 enable(extended_features_ebx, 5, Feature::avx2);
238
239 // "Short" versions of AVX512 instructions
240 enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
241 enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
242 enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
243 enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert);
244 enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16);
245
246 enable(extended_features_eax_leaf_1, 0, Feature::sha512);
247 enable(extended_features_eax_leaf_1, 1, Feature::sm3);
248 enable(extended_features_eax_leaf_1, 2, Feature::sm4);
249
250 // For AVX-512 the OS also needs to support saving/restoring
251 // the extended state, only then we enable AVX-512 support:
252 // Also, Rust makes `avx512f` imply `fma` and `f16c`, because
253 // otherwise the assembler is broken. But Intel doesn't guarantee
254 // that `fma` and `f16c` are available with `avx512f`, so we
255 // need to check for them separately.
256 if os_avx512_support && f16c && fma {
257 enable(extended_features_ebx, 16, Feature::avx512f);
258 enable(extended_features_ebx, 17, Feature::avx512dq);
259 enable(extended_features_ebx, 21, Feature::avx512ifma);
260 enable(extended_features_ebx, 26, Feature::avx512pf);
261 enable(extended_features_ebx, 27, Feature::avx512er);
262 enable(extended_features_ebx, 28, Feature::avx512cd);
263 enable(extended_features_ebx, 30, Feature::avx512bw);
264 enable(extended_features_ebx, 31, Feature::avx512vl);
265 enable(extended_features_ecx, 1, Feature::avx512vbmi);
266 enable(extended_features_ecx, 6, Feature::avx512vbmi2);
267 enable(extended_features_ecx, 11, Feature::avx512vnni);
268 enable(extended_features_ecx, 12, Feature::avx512bitalg);
269 enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
270 enable(extended_features_edx, 8, Feature::avx512vp2intersect);
271 enable(extended_features_edx, 23, Feature::avx512fp16);
272 enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
273 }
274 }
275
276 if os_amx_support {
277 enable(extended_features_edx, 24, Feature::amx_tile);
278 enable(extended_features_edx, 25, Feature::amx_int8);
279 enable(extended_features_edx, 22, Feature::amx_bf16);
280 enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
281 enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
282
283 if max_basic_leaf >= 0x1e {
284 let CpuidResult { eax: amx_feature_flags_eax, .. } =
285 unsafe { __cpuid_count(0x1e_u32, 1) };
286
287 enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
288 enable(amx_feature_flags_eax, 5, Feature::amx_transpose);
289 enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
290 enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
291 enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
292 }
293 }
294
295 if os_apx_support {
296 enable(extended_features_edx_leaf_1, 21, Feature::apxf);
297 }
298
299 let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1);
300 if avx10_1 {
301 let CpuidResult { ebx, .. } = unsafe { __cpuid(0x24) };
302 let avx10_version = ebx & 0xff;
303 if avx10_version >= 2 {
304 value.set(Feature::avx10_2 as u32);
305 }
306 }
307 }
308 }
309 }
310
311 // Unfortunately, some Skylake chips erroneously report support for BMI1 and
312 // BMI2 without actual support. These chips don't support AVX, and it seems
313 // that all Intel chips with non-erroneous support BMI do (I didn't check
314 // other vendors), so we can disable these flags for chips that don't also
315 // report support for AVX.
316 //
317 // It's possible this will pessimize future chips that do support BMI and
318 // not AVX, but this seems minor compared to a hard crash you get when
319 // executing an unsupported instruction (to put it another way, it's safe
320 // for us to under-report CPU features, but not to over-report them). Still,
321 // to limit any impact this may have in the future, we only do this for
322 // Intel chips, as it's a bug only present in their chips.
323 //
324 // This bug is documented as `SKL052` in the errata section of this document:
325 // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf
326 if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) {
327 value.unset(Feature::bmi1 as u32);
328 value.unset(Feature::bmi2 as u32);
329 }
330
331 value
332}