-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Open
Labels
feature requestrequest for unsupported feature or enhancementrequest for unsupported feature or enhancementplatform:windowsissues related to the Windows platformissues related to the Windows platform
Description
Describe the feature request
Windows ARM SME/SME2 detection returns false (cpuinfo_has_arm_sme()/cpuinfo_has_arm_sme2())
onnxruntime/onnxruntime/core/common/cpuid_info.cc
void CPUIDInfo::ArmWindowsInit() {
// Read MIDR and ID_AA64ISAR1_EL1 register values from Windows registry
// There should be one per CPU
std::vector<uint64_t> midr_values{}, id_aa64isar1_el1_values{};
// TODO!! Don't support multiple processor group yet!!
constexpr int MAX_CORES = 64;
constexpr int MAX_VALUE_NAME = 4096;
CHAR processor_subkey[MAX_VALUE_NAME] = ""; // buffer for processor registry name
for (size_t i = 0; i < MAX_CORES - 1; i++) {
snprintf(processor_subkey, MAX_VALUE_NAME, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\%d",
static_cast<int>(i));
uint64_t midr_value;
unsigned long data_size = sizeof(midr_value);
/*
* ARM lists for each coprocessor register 5 fields: op0/op1/CRn/CRm/op2.
* You need to put those numbers through the ARM64_SYSREG macro:
*
* #define ARM64_SYSREG(op0, op1, crn, crm, op2) \
* (((op0 & 1) << 14) | \
* ((op1 & 7) << 11) | \
* ((crn & 15) << 7) | \
* ((crm & 15) << 3) | \
* ((op2 & 7) << 0))
*
* For the CP value of MIDR, op0 = 3 and the others are all = 0, so we come up with 0x4000,
*/
if (::RegGetValueA(HKEY_LOCAL_MACHINE, processor_subkey, "CP 4000", RRF_RT_REG_QWORD,
nullptr, &midr_value, &data_size) != ERROR_SUCCESS) {
break;
}
uint64_t id_aa64isar1_el1_value;
data_size = sizeof(id_aa64isar1_el1_value);
// CP 4031 corresponds to ID_AA64ISAR1_EL1 register
if (::RegGetValueA(HKEY_LOCAL_MACHINE, processor_subkey, "CP 4031", RRF_RT_REG_QWORD,
nullptr, &id_aa64isar1_el1_value, &data_size) != ERROR_SUCCESS) {
break;
}
midr_values.push_back(midr_value);
id_aa64isar1_el1_values.push_back(id_aa64isar1_el1_value);
}
// process midr_values
{
uint32_t lastUarch = cpuinfo_uarch_unknown;
for (size_t i = 0; i < midr_values.size(); ++i) {
uint32_t uarch = cpuinfo_uarch_unknown;
decodeMIDR(static_cast<uint32_t>(midr_values[i]), &uarch);
core_uarchs_.push_back(uarch);
if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
uarch == cpuinfo_uarch_cortex_a55) {
is_armv8_narrow_ld_.push_back(true);
} else {
is_armv8_narrow_ld_.push_back(false);
}
if (i == 0) {
lastUarch = uarch;
} else if (lastUarch != uarch) {
is_hybrid_ = true;
lastUarch = uarch;
}
}
}
has_arm_neon_i8mm_ = std::all_of(
id_aa64isar1_el1_values.begin(), id_aa64isar1_el1_values.end(),
[](uint64_t id_aa64isar1_el1_value) {
// I8MM, bits [55:52]
return ((id_aa64isar1_el1_value >> 52) & 0xF) != 0;
});
has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);
#if defined(CPUINFO_SUPPORTED)
if (pytorch_cpuinfo_init_) {
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
// cpuinfo_has_arm_i8mm() doesn't work on Windows yet. See https://github.com/pytorch/cpuinfo/issues/279.
// has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && has_arm_neon_i8mm_;
has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
}
#endif // defined(CPUINFO_SUPPORTED)
}Describe scenario use case
For MLAS SME/SME2 feature detection on windows, these flags (cpuinfo_has_arm_sme()/cpuinfo_has_arm_sme2())
are returning zero, and falling back to neon
Metadata
Metadata
Assignees
Labels
feature requestrequest for unsupported feature or enhancementrequest for unsupported feature or enhancementplatform:windowsissues related to the Windows platformissues related to the Windows platform