Use QueryAdapter APIs as per MS directives

Update ICD loader to use QueryAdapter APIs
as per MS directives for para-virtualization.

This includes changes to -
1. Order OpenCL platforms consistent with Windows adapters,
   to honor user-settings for multi-adapter machines.
2. Fix OS version detection.
3. Some minor fixes including
    1. Add trace prints at required places.
    2. Fix build failure in getting the address of D3DKMTQueryAdapterInfo.
    3. Fix early return platform enumeartion if there are no entries found for old reg key
       under "HKLM\\SOFTWARE\\Khronos\\OpenCL\\Vendors" to allow add for the entries found
       using DXGK interface and HKR entries.
4. Some other fixes including
    1. Update README.md with WDK dependency information.
    2. Move AddAdapter* call inside for which got misplaced while resolving conflicts.
    3. Free WinAdapter allocations and avoid overriding NULL platform.
5. Fixes based on review comments including -
    1. WinAdapter is reallocated with previous allocation freed.
       However, the final allocation is not freed.
       Fixed the same to free at the end of enumeration.
    2. Fix header file ordering by including windows headers
       inside necessary header files.
diff --git a/README.md b/README.md
index 27c1de7..d520ac0 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,11 @@
 By default, the OpenCL ICD Loader will look for OpenCL Headers in the `inc` directory.
 
 By default, the OpenCL ICD Loader on Windows requires the Windows Driver Kit (WDK).
+To build OpenCL ICD Loader with WDK support -
+* Install recent Windows WDK currently at https://docs.microsoft.com/en-us/windows-hardware/drivers/download-the-wdk
+
+* Establish environment variable WDK to include directory. Ex: set WDK=C:\Program Files (x86)\Windows Kits\10\include\10.0.17763.0
+
 An OpenCL ICD Loader may be built without the Windows Driver Kit using the CMake variable `OPENCL_ICD_LOADER_REQUIRE_WDK`, however this option should be used with caution since it may prevent the OpenCL ICD Loader from enumerating some OpenCL implementations.
 This dependency may be removed in the future.
 
@@ -115,4 +120,4 @@
 ## Contributing
 
 Contributions to the OpenCL ICD Loader are welcomed and encouraged.
-You will be prompted with a one-time "click-through" CLA dialog as part of submitting your pull request or other contribution to GitHub.
\ No newline at end of file
+You will be prompted with a one-time "click-through" CLA dialog as part of submitting your pull request or other contribution to GitHub.
diff --git a/loader/icd_dispatch.c b/loader/icd_dispatch.c
index df967cb..0a20777 100644
--- a/loader/icd_dispatch.c
+++ b/loader/icd_dispatch.c
@@ -99,6 +99,12 @@
 {
     // initialize the platforms (in case they have not been already)
     khrIcdInitialize();
+
+    if (!platform && khrIcdVendors != NULL)
+    {
+        platform = khrIcdVendors[0].platform;
+    }
+
     KHR_ICD_VALIDATE_HANDLE_RETURN_ERROR(platform, CL_INVALID_PLATFORM);   
     return platform->dispatch->clGetDeviceIDs(
         platform,
@@ -196,8 +202,12 @@
     // initialize the platforms (in case they have not been already)
     khrIcdInitialize();
 
-    // determine the platform to use from the properties specified
+    // determine the platform to use from the properties and device_type specified
     khrIcdContextPropertiesGetPlatform(properties, &platform);
+    if (!platform && khrIcdVendors != NULL)
+    {
+        platform = khrIcdVendors[0].platform;
+    }
 
     // validate the platform handle and dispatch
     KHR_ICD_VALIDATE_HANDLE_RETURN_HANDLE(platform, CL_INVALID_PLATFORM);
@@ -1823,6 +1833,10 @@
 
     // determine the platform to use from the properties specified
     khrIcdContextPropertiesGetPlatform(properties, &platform);
+    if (!platform && khrIcdVendors != NULL)
+    {
+        platform = khrIcdVendors[0].platform;
+    }
 
     KHR_ICD_VALIDATE_HANDLE_RETURN_ERROR(platform, CL_INVALID_PLATFORM);    
     return platform->dispatch->clGetGLContextInfoKHR(
diff --git a/loader/linux/icd_linux.c b/loader/linux/icd_linux.c
index f6bb7b6..a36655e 100644
--- a/loader/linux/icd_linux.c
+++ b/loader/linux/icd_linux.c
@@ -166,4 +166,3 @@
 {
     dlclose(library);
 }
-
diff --git a/loader/windows/icd_windows.c b/loader/windows/icd_windows.c
index ec86184..6f7d880 100644
--- a/loader/windows/icd_windows.c
+++ b/loader/windows/icd_windows.c
@@ -23,8 +23,61 @@
 #include <windows.h>
 #include <winreg.h>
 
+#include <initguid.h>
+#include <dxgi.h>
+typedef HRESULT (WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID, void **);
+
 static INIT_ONCE initialized = INIT_ONCE_STATIC_INIT;
 
+typedef struct WinAdapter
+{
+    char * szName;
+    LUID luid;
+} WinAdapter;
+
+LUID ZeroLuid = { 0, 0 };
+
+static WinAdapter* pWinAdapterBegin = NULL;
+static WinAdapter* pWinAdapterEnd = NULL;
+static WinAdapter* pWinAdapterCapacity = NULL;
+
+void AdapterAdd(const char* szName, LUID luid)
+{
+    if (pWinAdapterEnd == pWinAdapterCapacity)
+    {
+        size_t OldCapacity = pWinAdapterCapacity - pWinAdapterBegin;
+        size_t NewCapacity = OldCapacity;
+        if (0 == NewCapacity)
+        {
+            NewCapacity = 1;
+        }
+        NewCapacity *= 2;
+
+        WinAdapter* pNewBegin = malloc(NewCapacity * sizeof(*pWinAdapterBegin));
+        if (pNewBegin)
+        {
+            if (pWinAdapterBegin)
+            {
+                memcpy(pNewBegin, pWinAdapterBegin, OldCapacity * sizeof(*pWinAdapterBegin));
+                free(pWinAdapterBegin);
+            }
+            pWinAdapterCapacity = pNewBegin + NewCapacity;
+            pWinAdapterEnd = pNewBegin + OldCapacity;
+            pWinAdapterBegin = pNewBegin;
+        }
+    }
+    if (pWinAdapterEnd != pWinAdapterCapacity)
+    {
+        size_t nameLen = strlen(szName) + 1;
+        if (pWinAdapterEnd->szName = malloc(nameLen))
+        {
+            memcpy(pWinAdapterEnd->szName, szName, nameLen * sizeof(*szName));
+            pWinAdapterEnd->luid = luid;
+            ++pWinAdapterEnd;
+        }
+    }
+}
+
 /*
  * 
  * Vendor enumeration functions
@@ -59,59 +112,98 @@
     if (ERROR_SUCCESS != result)
     {
         KHR_ICD_TRACE("Failed to open platforms key %s, continuing\n", platformsName);
-        return TRUE;
     }
-
-    // for each value
-    for (dwIndex = 0;; ++dwIndex)
-    {
-        char cszLibraryName[1024] = {0};
-        DWORD dwLibraryNameSize = sizeof(cszLibraryName);
-        DWORD dwLibraryNameType = 0;     
-        DWORD dwValue = 0;
-        DWORD dwValueSize = sizeof(dwValue);
-
-        // read the value name
-        KHR_ICD_TRACE("Reading value %d...\n", dwIndex);
-        result = RegEnumValueA(
-              platformsKey,
-              dwIndex,
-              cszLibraryName,
-              &dwLibraryNameSize,
-              NULL,
-              &dwLibraryNameType,
-              (LPBYTE)&dwValue,
-              &dwValueSize);
-        // if RegEnumKeyEx fails, we are done with the enumeration
-        if (ERROR_SUCCESS != result) 
+    else {
+        // for each value
+        for (dwIndex = 0;; ++dwIndex)
         {
-            KHR_ICD_TRACE("Failed to read value %d, done reading key.\n", dwIndex);
-            break;
-        }
-        KHR_ICD_TRACE("Value %s found...\n", cszLibraryName);
+            char cszLibraryName[MAX_PATH] = {0};
+            DWORD dwLibraryNameSize = sizeof(cszLibraryName);
+            DWORD dwLibraryNameType = 0;     
+            DWORD dwValue = 0;
+            DWORD dwValueSize = sizeof(dwValue);
+
+            // read the value name
+            KHR_ICD_TRACE("Reading value %d...\n", dwIndex);
+            result = RegEnumValueA(
+                  platformsKey,
+                  dwIndex,
+                  cszLibraryName,
+                  &dwLibraryNameSize,
+                  NULL,
+                  &dwLibraryNameType,
+                  (LPBYTE)&dwValue,
+                  &dwValueSize);
+            // if RegEnumKeyEx fails, we are done with the enumeration
+            if (ERROR_SUCCESS != result) 
+            {
+                KHR_ICD_TRACE("Failed to read value %d, done reading key.\n", dwIndex);
+                break;
+            }
+            KHR_ICD_TRACE("Value %s found...\n", cszLibraryName);
         
-        // Require that the value be a DWORD and equal zero
-        if (REG_DWORD != dwLibraryNameType)  
-        {
-            KHR_ICD_TRACE("Value not a DWORD, skipping\n");
-            continue;
+            // Require that the value be a DWORD and equal zero
+            if (REG_DWORD != dwLibraryNameType)  
+            {
+                KHR_ICD_TRACE("Value not a DWORD, skipping\n");
+                continue;
+            }
+            if (dwValue)
+            {
+                KHR_ICD_TRACE("Value not zero, skipping\n");
+                continue;
+            }
+            // add the library
+            AdapterAdd(cszLibraryName, ZeroLuid);
         }
-        if (dwValue)
-        {
-            KHR_ICD_TRACE("Value not zero, skipping\n");
-            continue;
-        }
-
-        // add the library
-        khrIcdVendorAdd(cszLibraryName);
     }
 
+    // Add adapters according to DXGI's preference order
+    HMODULE hDXGI = LoadLibrary("dxgi.dll");
+    if (hDXGI)
+    {
+        IDXGIFactory* pFactory = NULL;
+        PFN_CREATE_DXGI_FACTORY pCreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGI, "CreateDXGIFactory");
+        HRESULT hr = pCreateDXGIFactory(&IID_IDXGIFactory, &pFactory);
+        if (SUCCEEDED(hr))
+        {
+            UINT i = 0;
+            IDXGIAdapter* pAdapter = NULL;
+            while (SUCCEEDED(pFactory->lpVtbl->EnumAdapters(pFactory, i++, &pAdapter)))
+            {
+                DXGI_ADAPTER_DESC AdapterDesc;
+                pAdapter->lpVtbl->GetDesc(pAdapter, &AdapterDesc);
+
+                for (WinAdapter* iterAdapter = pWinAdapterBegin; iterAdapter != pWinAdapterEnd; ++iterAdapter)
+                {
+                    if (iterAdapter->luid.LowPart == AdapterDesc.AdapterLuid.LowPart
+                        && iterAdapter->luid.HighPart == AdapterDesc.AdapterLuid.HighPart)
+                    {
+                        khrIcdVendorAdd(iterAdapter->szName);
+                        break;
+                    }
+                }
+
+                pAdapter->lpVtbl->Release(pAdapter);
+            }
+            pFactory->lpVtbl->Release(pFactory);
+        }
+        FreeLibrary(hDXGI);
+    }
+
+    // Go through the list again, putting any remaining adapters at the end of the list in an undefined order
+    for (WinAdapter* iterAdapter = pWinAdapterBegin; iterAdapter != pWinAdapterEnd; ++iterAdapter)
+    {
+        khrIcdVendorAdd(iterAdapter->szName);
+    }
+
+    free(pWinAdapterBegin);	
+
     result = RegCloseKey(platformsKey);
     if (ERROR_SUCCESS != result)
     {
         KHR_ICD_TRACE("Failed to close platforms key %s, ignoring\n", platformsName);
     }
-	
     return TRUE;
 }
 
@@ -148,4 +240,3 @@
 {
     FreeLibrary( (HMODULE)library);
 }
-
diff --git a/loader/windows/icd_windows_dxgk.c b/loader/windows/icd_windows_dxgk.c
index 8cc3fb9..8530c8e 100644
--- a/loader/windows/icd_windows_dxgk.c
+++ b/loader/windows/icd_windows_dxgk.c
@@ -37,16 +37,17 @@
     bool ret = false;
 #if defined(OPENCL_ICD_LOADER_REQUIRE_WDK)
 #if defined(DXGKDDI_INTERFACE_VERSION_WDDM2_4) && (DXGKDDI_INTERFACE_VERSION >= DXGKDDI_INTERFACE_VERSION_WDDM2_4)
+    // Get handle to GDI Runtime
+    HMODULE h = LoadLibrary("gdi32.dll");
+    if (h && GetProcAddress((HMODULE)h, "D3DKMTSubmitPresentBltToHwQueue")) // OS Version check
     {
         D3DKMT_ADAPTERINFO* pAdapterInfo = NULL;
         D3DKMT_ENUMADAPTERS2 EnumAdapters;
         NTSTATUS Status = STATUS_SUCCESS;
 
-        // Get handle to GDI Runtime
-        HMODULE h = LoadLibrary("gdi32.dll");
-        KHR_ICD_ASSERT(h != NULL);
-
         char cszLibraryName[MAX_PATH] = { 0 };
+        EnumAdapters.NumAdapters = 0;
+        EnumAdapters.pAdapters = NULL;
         PFND3DKMT_ENUMADAPTERS2 pEnumAdapters2 = (PFND3DKMT_ENUMADAPTERS2)GetProcAddress((HMODULE)h, "D3DKMTEnumAdapters2");
         if (!pEnumAdapters2)
         {
@@ -83,6 +84,8 @@
             KHR_ICD_TRACE("D3DKMT_ENUMADAPTERS2 status != SUCCESS\n");
             goto out;
         }
+        const char* cszOpenCLRegKeyName = GetOpenCLRegKeyName();
+        const int OpenCLRegKeyNameSize = (int)(strlen(cszOpenCLRegKeyName) + 1);
         for (UINT AdapterIndex = 0; AdapterIndex < EnumAdapters.NumAdapters; AdapterIndex++)
         {
             D3DDDI_QUERYREGISTRY_INFO QueryArgs = {0};
@@ -91,20 +94,13 @@
             QueryArgs.QueryType = D3DDDI_QUERYREGISTRY_ADAPTERKEY;
             QueryArgs.QueryFlags.TranslatePath = TRUE;
             QueryArgs.ValueType = REG_SZ;
-#ifdef _WIN64
-            wcscpy_s(QueryArgs.ValueName, ARRAYSIZE(L"OpenCLDriverName"), L"OpenCLDriverName");
-#else
-            // There is no WOW prefix for 32bit Windows hence make a specific check
-            BOOL is_wow64;
-            if (IsWow64Process(GetCurrentProcess(), &is_wow64) && is_wow64)
-            {
-                wcscpy_s(QueryArgs.ValueName, ARRAYSIZE(L"OpenCLDriverNameWow"), L"OpenCLDriverNameWow");
-            }
-            else
-            {
-                wcscpy_s(QueryArgs.ValueName, ARRAYSIZE(L"OpenCLDriverName"), L"OpenCLDriverName");
-            }
-#endif
+            MultiByteToWideChar(
+                CP_ACP,
+                0,
+                cszOpenCLRegKeyName,
+                OpenCLRegKeyNameSize,
+                QueryArgs.ValueName,
+                ARRAYSIZE(QueryArgs.ValueName));
             D3DKMT_QUERYADAPTERINFO QueryAdapterInfo = {0};
             QueryAdapterInfo.hAdapter = pAdapterInfo[AdapterIndex].hAdapter;
             QueryAdapterInfo.Type = KMTQAITYPE_QUERYREGISTRY;
@@ -113,8 +109,9 @@
             Status = D3DKMTQueryAdapterInfo(&QueryAdapterInfo);
             if (!NT_SUCCESS(Status))
             {
-                KHR_ICD_TRACE("D3DKMT_QUERYADAPTERINFO status != SUCCESS\n");
-                goto out;
+                // Continue trying to get as much info on each adapter as possible.
+                // It's too late to return FALSE and claim WDDM2_4 enumeration is not available here.
+                continue;
             }
             if (NT_SUCCESS(Status) && pQueryArgs->Status == D3DDDI_QUERYREGISTRY_STATUS_BUFFER_OVERFLOW)
             {
@@ -133,7 +130,7 @@
                 {
                     size_t len = wcstombs(cszLibraryName, pWchar, sizeof(cszLibraryName));
                     KHR_ICD_ASSERT(len == (sizeof(cszLibraryName) - 1));
-                    khrIcdVendorAdd(cszLibraryName);
+                    AdapterAdd(cszLibraryName, pAdapterInfo[AdapterIndex].AdapterLuid);
                 }
             }
             else if (Status == STATUS_INVALID_PARAMETER && pQueryArgs->Status == D3DDDI_QUERYREGISTRY_STATUS_FAIL)
diff --git a/loader/windows/icd_windows_hkr.c b/loader/windows/icd_windows_hkr.c
index 3387181..b006372 100644
--- a/loader/windows/icd_windows_hkr.c
+++ b/loader/windows/icd_windows_hkr.c
@@ -19,6 +19,7 @@
 #include "icd.h"
 #include "icd_windows_hkr.h"
 #include <windows.h>
+#include "icd_windows_dxgk.h"
 #include <cfgmgr32.h>
 #include <assert.h>
 #include <stdbool.h>
@@ -51,7 +52,7 @@
 #endif
 
 // Do not free the memory returned by this function.
-static const char* GetOpenCLRegKeyName(void)
+const char* GetOpenCLRegKeyName(void)
 {
 #ifdef _WIN64
     return OPENCL_REG_SUB_KEY;
@@ -136,7 +137,7 @@
 
         KHR_ICD_TRACE("    Path: %s\n", cszOclPath);
 
-        khrIcdVendorAdd(cszOclPath);
+        AdapterAdd(cszOclPath, ZeroLuid);
 
         bRet = true;
     }
diff --git a/loader/windows/icd_windows_hkr.h b/loader/windows/icd_windows_hkr.h
index 698fe5a..b2e16cb 100644
--- a/loader/windows/icd_windows_hkr.h
+++ b/loader/windows/icd_windows_hkr.h
@@ -17,5 +17,13 @@
  */
 
 #include <stdbool.h>
+#include <windows.h>
 
 bool khrIcdOsVendorsEnumerateHKR(void);
+
+LUID ZeroLuid;
+
+void AdapterAdd(const char* szName, LUID luid);
+
+// Do not free the memory returned by this function.
+const char* GetOpenCLRegKeyName(void);