1111#include < unordered_set>
1212
1313#include " core/common/cpuid_info.h"
14+ #include " core/common/logging/logging.h"
1415#include " core/session/abi_devices.h"
1516
16- // // UsingSetupApi
17+ // // For SetupApi info
1718#include < Windows.h>
1819#include < SetupAPI.h>
1920#include < devguid.h>
2021#include < cfgmgr32.h>
2122#pragma comment(lib, "setupapi.lib")
2223
23- // // Using D3D12
24+ // // For D3D12 info
2425// #include <windows.h>
2526#include < d3d12.h>
2627#include < dxgi1_6.h>
2728#include < iostream>
29+ #include < wrl/client.h>
30+ using Microsoft::WRL::ComPtr;
2831
2932#pragma comment(lib, "d3d12.lib")
3033#pragma comment(lib, "dxgi.lib")
3134
32- // // Using DXCore. Requires newer Windows SDK than what we target by default.
33- // these values were added in 10.0.22621.0 as part of DirectXCore API
34- //
35- // In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied.
36- // with the NTDII_VERSION value there...
37- //
38- // Defining a local GUID instead.
39- // #if NTDDI_VERSION < NTDDI_WIN10_RS5
40- // DEFINE_GUID(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML, 0xb71b0d41, 0x1088, 0x422f, 0xa2, 0x7c, 0x2, 0x50, 0xb7, 0xd3, 0xa9, 0x88);
41- // DEFINE_GUID(DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU, 0xd46140c4, 0xadd7, 0x451b, 0x9e, 0x56, 0x6, 0xfe, 0x8c, 0x3b, 0x58, 0xed);
42- // #endif
35+ // // For DXCore info.
4336#include < initguid.h>
4437#include < dxcore.h>
4538#include < dxcore_interface.h>
4639#include < wil/com.h>
4740
48- //
49- // In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied. Not sure what is happening
50- // with the NTDII_VERSION value there...
51- //
52- // Defining a local GUID instead.
5341#include " core/common/cpuid_info.h"
5442#include " core/session/abi_devices.h"
5543
5644namespace onnxruntime {
57- #if !defined(ORT_MINIMAL_BUILD)
45+ // unsupported in minimal build. also needs xbox specific handling to be implemented.
46+ #if !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX)
5847namespace {
5948
6049// device info we accumulate from various sources
@@ -64,7 +53,6 @@ struct DeviceInfo {
6453 uint32_t device_id;
6554 std::wstring vendor;
6655 std::wstring description;
67- std::vector<DWORD> bus_ids; // assuming could have multiple GPUs that are the same model
6856 std::unordered_map<std::wstring, std::wstring> metadata;
6957};
7058
@@ -97,14 +85,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
9785 for (auto guid : guids) {
9886 HDEVINFO devInfo = SetupDiGetClassDevs (&guid, nullptr , nullptr , DIGCF_PRESENT);
9987 if (devInfo == INVALID_HANDLE_VALUE) {
100- return device_info ;
88+ continue ;
10189 }
10290
10391 SP_DEVINFO_DATA devData = {};
10492 devData.cbSize = sizeof (SP_DEVINFO_DATA);
10593
106- std::wstring buffer;
107- buffer.resize (1024 );
94+ WCHAR buffer[1024 ];
10895
10996 for (DWORD i = 0 ; SetupDiEnumDeviceInfo (devInfo, i, &devData); ++i) {
11097 DWORD size = 0 ;
@@ -114,13 +101,8 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
114101 DeviceInfo* entry = nullptr ;
115102
116103 // // Get hardware ID (contains VEN_xxxx&DEV_xxxx)
117- if (SetupDiGetDeviceRegistryPropertyW (devInfo,
118- &devData,
119- SPDRP_HARDWAREID,
120- ®DataType,
121- (PBYTE)buffer.data (),
122- (DWORD)buffer.size (),
123- &size)) {
104+ if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_HARDWAREID, ®DataType,
105+ (PBYTE)buffer, sizeof (buffer), &size)) {
124106 // PCI\VEN_xxxx&DEV_yyyy&...
125107 // ACPI\VEN_xxxx&DEV_yyyy&... if we're lucky.
126108 // ACPI values seem to be very inconsistent, so we check fairly carefully and always require a device id.
@@ -148,23 +130,31 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
148130 device_info[key] = {};
149131 } else {
150132 if (guid == GUID_DEVCLASS_PROCESSOR) {
151- // skip duplicate processor entries as we don't need to accumulate bus numbers for them
133+ // skip duplicate processor entries
152134 continue ;
153135 }
154136 }
155137
156138 entry = &device_info[key];
157139 entry->vendor_id = vendor_id;
158140 entry->device_id = device_id;
141+ // put the first hardware id string in the metadata. ignore the other lines.
142+ entry->metadata .emplace (L" SPDRP_HARDWAREID" , std::wstring (buffer, wcslen (buffer)));
159143 } else {
160144 // need valid ids
161145 continue ;
162146 }
163147
164- // Get device description.
148+ // Use the friendly name if available.
149+ if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_FRIENDLYNAME, nullptr ,
150+ (PBYTE)buffer, sizeof (buffer), &size)) {
151+ entry->description = std::wstring{buffer};
152+ }
153+
154+ // Set type using the device description to try and infer an NPU.
165155 if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_DEVICEDESC, nullptr ,
166- (PBYTE)buffer. data (), (DWORD) buffer. size ( ), &size)) {
167- entry-> description = buffer;
156+ (PBYTE)buffer, sizeof ( buffer), &size)) {
157+ std::wstring desc{ buffer} ;
168158
169159 // Should we require the NPU to be found by DXCore or do we want to allow this vague matching?
170160 // Probably depends on whether we always attempt to run DXCore or not.
@@ -175,9 +165,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
175165 desc.find (L" VPU" ) != std::wstring::npos);
176166 };
177167
178- // not 100% accurate. is there a better way?
168+ // use description if no friendly name
169+ if (entry->description .empty ()) {
170+ entry->description = desc;
171+ }
172+
179173 uint64_t npu_key = GetDeviceKey (*entry);
180- bool is_npu = npus.count (npu_key) > 0 || possible_npu (entry-> description );
174+ bool is_npu = npus.count (npu_key) > 0 || possible_npu (desc );
181175
182176 if (guid == GUID_DEVCLASS_DISPLAY) {
183177 entry->type = OrtHardwareDeviceType_GPU;
@@ -201,18 +195,21 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
201195 }
202196
203197 if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_MFG, nullptr ,
204- (PBYTE)buffer. data (), (DWORD) buffer. size ( ), &size)) {
205- entry->vendor = buffer;
198+ (PBYTE)buffer, sizeof ( buffer), &size)) {
199+ entry->vendor = std::wstring ( buffer, wcslen (buffer)) ;
206200 }
207201
208- if (guid != GUID_DEVCLASS_PROCESSOR) {
209- DWORD busNumber = 0 ;
210- size = 0 ;
211- if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_BUSNUMBER, nullptr ,
212- reinterpret_cast <PBYTE>(&busNumber), sizeof (busNumber), &size)) {
213- // push_back in case there are two identical devices. not sure how else to tell them apart
214- entry->bus_ids .push_back (busNumber);
202+ // Add the UI number if GPU. Helpful if user has integrated and discrete GPUs
203+ if (entry->type == OrtHardwareDeviceType_GPU) {
204+ DWORD ui_number = 0 ;
205+ if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_UI_NUMBER, nullptr ,
206+ (PBYTE)&ui_number, sizeof (ui_number), &size)) {
207+ // use value read.
208+ } else {
209+ // infer it as 0 if not set.
215210 }
211+
212+ entry->metadata .emplace (L" SPDRP_UI_NUMBER" , std::to_wstring (ui_number));
216213 }
217214 }
218215
@@ -226,50 +223,58 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
226223std::unordered_map<uint64_t , DeviceInfo> GetDeviceInfoD3D12 () {
227224 std::unordered_map<uint64_t , DeviceInfo> device_info;
228225
229- IDXGIFactory6* factory = nullptr ;
230- HRESULT hr = CreateDXGIFactory1 (IID_PPV_ARGS (&factory));
231- if (FAILED (hr)) {
226+ ComPtr<IDXGIFactory6> factory;
227+ if (FAILED (CreateDXGIFactory2 (0 , IID_PPV_ARGS (&factory)))) {
232228 std::cerr << " Failed to create DXGI factory.\n " ;
233229 return device_info;
234230 }
235231
236- IDXGIAdapter1* adapter = nullptr ;
237-
238- // iterate by high-performance GPU preference first
239- for (UINT i = 0 ; factory->EnumAdapterByGpuPreference (i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
240- IID_PPV_ARGS (&adapter)) != DXGI_ERROR_NOT_FOUND;
241- ++i) {
232+ ComPtr<IDXGIAdapter1> adapter;
233+ for (UINT i = 0 ; factory->EnumAdapters1 (i, adapter.ReleaseAndGetAddressOf ()) != DXGI_ERROR_NOT_FOUND; ++i) {
242234 DXGI_ADAPTER_DESC1 desc;
243235 if (FAILED (adapter->GetDesc1 (&desc))) {
244236 continue ;
245237 }
246238
247- do {
248- if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0 ||
249- (desc.Flags & DXGI_ADAPTER_FLAG_REMOTE) != 0 ) {
250- // software or remote. skip
251- break ;
252- }
239+ if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0 ||
240+ (desc.Flags & DXGI_ADAPTER_FLAG_REMOTE) != 0 ) {
241+ // software or remote. skip
242+ continue ;
243+ }
253244
254- static_assert (sizeof (LUID) == sizeof (uint64_t ), " LUID and uint64_t are not the same size" );
255- uint64_t key = GetLuidKey (desc.AdapterLuid );
245+ static_assert (sizeof (LUID) == sizeof (uint64_t ), " LUID and uint64_t are not the same size" );
246+ uint64_t key = GetLuidKey (desc.AdapterLuid );
256247
257- DeviceInfo& info = device_info[key];
258- info.type = OrtHardwareDeviceType_GPU;
259- info.vendor_id = desc.VendorId ;
260- info.device_id = desc.DeviceId ;
261- info.description = std::wstring (desc.Description );
262-
263- info.metadata [L" VideoMemory" ] = std::to_wstring (desc.DedicatedVideoMemory / (1024 * 1024 )) + L" MB" ;
264- info.metadata [L" SystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
265- info.metadata [L" SharedSystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
266- info.metadata [L" HighPerformanceIndex" ] = std::to_wstring (i);
267- } while (false );
248+ DeviceInfo& info = device_info[key];
249+ info.type = OrtHardwareDeviceType_GPU;
250+ info.vendor_id = desc.VendorId ;
251+ info.device_id = desc.DeviceId ;
252+ info.description = std::wstring (desc.Description );
268253
269- adapter->Release ();
254+ info.metadata [L" DxgiAdapterNumber" ] = std::to_wstring (i);
255+ info.metadata [L" VideoMemory" ] = std::to_wstring (desc.DedicatedVideoMemory / (1024 * 1024 )) + L" MB" ;
256+ info.metadata [L" SystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
257+ info.metadata [L" SharedSystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
270258 }
271259
272- factory->Release ();
260+ // iterate by high-performance GPU preference to add that info
261+ for (UINT i = 0 ; factory->EnumAdapterByGpuPreference (
262+ i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
263+ IID_PPV_ARGS (adapter.ReleaseAndGetAddressOf ())) != DXGI_ERROR_NOT_FOUND;
264+ ++i) {
265+ DXGI_ADAPTER_DESC1 desc;
266+ if (FAILED (adapter->GetDesc1 (&desc))) {
267+ continue ;
268+ }
269+
270+ uint64_t key = GetLuidKey (desc.AdapterLuid );
271+
272+ auto it = device_info.find (key);
273+ if (it != device_info.end ()) {
274+ DeviceInfo& info = it->second ;
275+ info.metadata [L" HighPerformanceIndex" ] = std::to_wstring (i);
276+ }
277+ }
273278
274279 return device_info;
275280}
@@ -284,7 +289,9 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
284289 return device_info;
285290 }
286291
287- // manually define for older Windows versions. will be no matches but means this code works on machines with dxcore.
292+ // NOTE: These GUIDs requires a newer Windows SDK than what we target by default.
293+ // They were added in 10.0.22621.0 as part of DirectXCore API
294+ // To workaround this we define a local copy of the values. On an older Windows machine they won't match anything.
288295 static const GUID local_DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML = {0xb71b0d41 , 0x1088 , 0x422f , 0xa2 , 0x7c , 0x2 , 0x50 , 0xb7 , 0xd3 , 0xa9 , 0x88 };
289296 static const GUID local_DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU = {0xd46140c4 , 0xadd7 , 0x451b , 0x9e , 0x56 , 0x6 , 0xfe , 0x8c , 0x3b , 0x58 , 0xed };
290297
@@ -353,27 +360,17 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
353360 &is_integrated))) {
354361 info.metadata [L" Discrete" ] = is_integrated ? L" 0" : L" 1" ;
355362 }
356-
357- // this returns char_t on us-en Windows. assuming it returns wchar_t on other locales but not clear what it
358- // does when.
359- // The description from SetupApi is wchar_t so assuming we have that and don't need this one.
360- //
361- // hrId = HRESULT_FROM_WIN32(ERROR_NOT_FOUND);
362- // std::wstring driverDescription;
363- // driverDescription.resize(256);
364- // // this doesn't seem to return wchar_t
365- // if (adapter->IsPropertySupported(DXCoreAdapterProperty::DriverDescription)) {
366- // hrId = adapter->GetProperty(DXCoreAdapterProperty::DriverDescription, sizeof(driverDescription),
367- // &driverDescription);
368- // info.description = driverDescription;
369- // }
370363 }
371364 }
372365
373366 return device_info;
374367}
375368} // namespace
376369
370+ // Get devices from various sources and combine them into a single set of devices.
371+ // For CPU we use setupapi data.
372+ // For GPU we augment the d3d12 and dxcore data with the setupapi data.
373+ // For NPU we augment the dxcore data with the setupapi data.
377374std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform () {
378375 // dxcore info. key is luid
379376 std::unordered_map<uint64_t , DeviceInfo> luid_to_dxinfo = GetDeviceInfoDxcore ();
@@ -408,18 +405,12 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
408405 }
409406 }
410407
411- std::wstring_convert<std::codecvt_utf8<wchar_t >> converter; // wstring to string
408+ std::wstring_convert<std::codecvt_utf8<wchar_t > > converter; // wstring to string
412409 const auto device_to_ortdevice = [&converter](
413410 DeviceInfo& device,
414411 std::unordered_map<std::wstring, std::wstring>* extra_metadata = nullptr ) {
415412 OrtHardwareDevice ortdevice{device.type , device.vendor_id , device.device_id , converter.to_bytes (device.vendor )};
416413
417- if (device.bus_ids .size () > 0 ) {
418- // use the first bus number. not sure how to handle multiple
419- ortdevice.metadata .Add (" BusNumber" , std::to_string (device.bus_ids .back ()).c_str ());
420- device.bus_ids .pop_back ();
421- }
422-
423414 if (!device.description .empty ()) {
424415 ortdevice.metadata .Add (" Description" , converter.to_bytes (device.description ));
425416 }
@@ -437,6 +428,18 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
437428 }
438429 }
439430
431+ std::ostringstream oss;
432+ oss << " Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id
433+ << " , device_id:0x" << ortdevice.device_id
434+ << " , type:" << std::dec << static_cast <int >(ortdevice.type )
435+ << " , metadata: [" ;
436+ for (auto & [key, value] : ortdevice.metadata .entries ) {
437+ oss << key << " =" << value << " , " ;
438+ }
439+
440+ oss << " ]}" << std::endl;
441+ LOGS_DEFAULT (INFO) << oss.str ();
442+
440443 return ortdevice;
441444 };
442445
@@ -459,14 +462,14 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
459462 // use SetupApi info. merge metadata.
460463 devices.emplace (device_to_ortdevice (it->second , &device.metadata ));
461464 } else {
462- // no matching entry in SetupApi. use the dxinfo. no vendor. no BusNumber.
465+ // no matching entry in SetupApi. use the dxinfo. will be missing vendor name and UI_NUMBER
463466 devices.emplace (device_to_ortdevice (device));
464467 }
465468 }
466469
467470 return devices;
468471}
469- #else // !defined(ORT_MINIMAL_BUILD)
472+ #else // !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX)
470473std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform () {
471474 return {};
472475}
0 commit comments