diff --git a/patches/proton/80-nv_low_latency_dxvk.patch b/patches/proton/80-nv_low_latency_dxvk.patch new file mode 100644 index 0000000000..a266b0f96a --- /dev/null +++ b/patches/proton/80-nv_low_latency_dxvk.patch @@ -0,0 +1,1149 @@ +From e6e806dcce98b22f90a8859162cadd74669af4d6 Mon Sep 17 00:00:00 2001 +From: Eric Sullivan +Date: Sun, 8 Oct 2023 23:54:00 -0700 +Subject: [PATCH 1/2] Update Vulkan headers. + +--- + include/vulkan | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/vulkan b/include/vulkan +index 85c2334e92e..bbe0f575ebd 160000 +--- a/include/vulkan ++++ b/include/vulkan +@@ -1 +1 @@ +-Subproject commit 85c2334e92e215cce34e8e0ed8b2dce4700f4a50 ++Subproject commit bbe0f575ebd6098369f0ac6c6a43532732ed0ba6 + +From 2518e1c71e64679be3f38c622f8294e9354e44a0 Mon Sep 17 00:00:00 2001 +From: Eric Sullivan +Date: Mon, 9 Oct 2023 01:51:16 -0700 +Subject: [PATCH 2/2] Add VK_NV_low_latency2 support + +This commit add support for the VK_NV_low_latency2 extension, and +implements the ID3DLowLatencyDevice interface. +--- + src/d3d11/d3d11_device.cpp | 168 +++++++++++++++++++++++++++++++--- + src/d3d11/d3d11_device.h | 82 ++++++++++++++--- + src/d3d11/d3d11_interfaces.h | 58 +++++++++++- + src/d3d11/d3d11_swapchain.cpp | 36 +++++++- + src/d3d11/d3d11_swapchain.h | 18 +++- + src/dxvk/dxvk_adapter.cpp | 13 ++- + src/dxvk/dxvk_cmdlist.cpp | 17 +++- + src/dxvk/dxvk_cmdlist.h | 5 +- + src/dxvk/dxvk_device.cpp | 2 + + src/dxvk/dxvk_device.h | 51 ++++++++++- + src/dxvk/dxvk_device_info.h | 3 +- + src/dxvk/dxvk_extensions.h | 1 + + src/dxvk/dxvk_presenter.cpp | 99 +++++++++++++++++++- + src/dxvk/dxvk_presenter.h | 43 +++++++++ + src/dxvk/dxvk_queue.cpp | 4 +- + src/dxvk/dxvk_queue.h | 1 + + src/vulkan/vulkan_loader.h | 8 ++ + 17 files changed, 563 insertions(+), 46 deletions(-) + +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index 9398e484208..22760b48922 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -15,6 +15,7 @@ + #include "d3d11_device.h" + #include "d3d11_fence.h" + #include "d3d11_input_layout.h" ++#include "d3d11_interfaces.h" + #include "d3d11_interop.h" + #include "d3d11_query.h" + #include "d3d11_resource.h" +@@ -2469,12 +2470,14 @@ namespace dxvk { + return deviceFeatures.nvxBinaryImport + && deviceFeatures.vk12.bufferDeviceAddress; + ++ case D3D11_VK_NV_LOW_LATENCY_2: ++ return deviceFeatures.nvLowLatency2; ++ + default: + return false; + } + } +- +- ++ + bool STDMETHODCALLTYPE D3D11DeviceExt::GetCudaTextureObjectNVX(uint32_t srvDriverHandle, uint32_t samplerDriverHandle, uint32_t* pCudaTextureHandle) { + ID3D11ShaderResourceView* srv = HandleToSrvNVX(srvDriverHandle); + +@@ -2783,8 +2786,133 @@ namespace dxvk { + + + ++ ++ D3D11LowLatencyDevice::D3D11LowLatencyDevice( ++ D3D11DXGIDevice* pContainer, ++ D3D11Device* pDevice) ++ : m_container(pContainer), m_device(pDevice) { ++ ++ } + + ++ ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::AddRef() { ++ return m_container->AddRef(); ++ } ++ ++ ++ ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::Release() { ++ return m_container->Release(); ++ } ++ ++ ++ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::QueryInterface( ++ REFIID riid, ++ void** ppvObject) { ++ return m_container->QueryInterface(riid, ppvObject); ++ } ++ ++ BOOL STDMETHODCALLTYPE D3D11LowLatencyDevice::SupportsLowLatency() { ++ return m_device->GetDXVKDevice()->features().nvLowLatency2; ++ } ++ ++ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::LatencySleep() { ++ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { ++ return E_NOINTERFACE; ++ } ++ ++ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); ++ if (pSwapChain && pSwapChain->LowLatencyEnabled()) { ++ VkResult res = pSwapChain->LatencySleep(); ++ if (res != VK_SUCCESS) { ++ return S_FALSE; ++ } ++ } ++ ++ return S_OK; ++ } ++ ++ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencySleepMode(BOOL lowLatencyMode, BOOL lowLatencyBoost, uint32_t minimumIntervalUs) { ++ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { ++ return E_NOINTERFACE; ++ } ++ ++ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); ++ if (pSwapChain) { ++ VkResult res = pSwapChain->SetLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs); ++ if (res != VK_SUCCESS) { ++ return S_FALSE; ++ } ++ } ++ ++ return S_OK; ++ } ++ ++ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencyMarker(uint64_t frameID, uint32_t markerType) { ++ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { ++ return E_NOINTERFACE; ++ } ++ ++ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); ++ VkLatencyMarkerNV marker = static_cast(markerType); ++ uint64_t internalFrameId = frameID + DXGI_MAX_SWAP_CHAIN_BUFFERS; ++ ++ m_device->GetDXVKDevice()->setLatencyMarker(marker, internalFrameId); ++ ++ if (pSwapChain && pSwapChain->LowLatencyEnabled()) { ++ pSwapChain->SetLatencyMarker(marker, internalFrameId); ++ } ++ ++ return S_OK; ++ } ++ ++ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::GetLatencyInfo(D3D11_LATENCY_RESULTS* latencyResults) ++ { ++ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { ++ return E_NOINTERFACE; ++ } ++ ++ constexpr uint32_t frameReportSize = 64; ++ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); ++ ++ if (pSwapChain && pSwapChain->LowLatencyEnabled()) { ++ std::vector frameReports; ++ pSwapChain->GetLatencyTimings(frameReports); ++ ++ if (frameReports.size() >= frameReportSize) { ++ for (uint32_t i = 0; i < frameReportSize; i++) { ++ VkLatencyTimingsFrameReportNV& frameReport = frameReports[i]; ++ latencyResults->frame_reports[i].frameID = frameReport.presentID - DXGI_MAX_SWAP_CHAIN_BUFFERS; ++ latencyResults->frame_reports[i].inputSampleTime = frameReport.inputSampleTimeUs; ++ latencyResults->frame_reports[i].simStartTime = frameReport.simStartTimeUs; ++ latencyResults->frame_reports[i].simEndTime = frameReport.simEndTimeUs; ++ latencyResults->frame_reports[i].renderSubmitStartTime = frameReport.renderSubmitStartTimeUs; ++ latencyResults->frame_reports[i].renderSubmitEndTime = frameReport.renderSubmitEndTimeUs; ++ latencyResults->frame_reports[i].presentStartTime = frameReport.presentStartTimeUs; ++ latencyResults->frame_reports[i].presentEndTime = frameReport.presentEndTimeUs; ++ latencyResults->frame_reports[i].driverStartTime = frameReport.driverStartTimeUs; ++ latencyResults->frame_reports[i].driverEndTime = frameReport.driverEndTimeUs; ++ latencyResults->frame_reports[i].osRenderQueueStartTime = frameReport.osRenderQueueStartTimeUs; ++ latencyResults->frame_reports[i].osRenderQueueEndTime = frameReport.osRenderQueueEndTimeUs; ++ latencyResults->frame_reports[i].gpuRenderStartTime = frameReport.gpuRenderStartTimeUs; ++ latencyResults->frame_reports[i].gpuRenderEndTime = frameReport.gpuRenderEndTimeUs; ++ latencyResults->frame_reports[i].gpuActiveRenderTimeUs = ++ frameReport.gpuRenderEndTimeUs - frameReport.gpuRenderStartTimeUs; ++ latencyResults->frame_reports[i].gpuFrameTimeUs = 0; ++ ++ if (i) { ++ latencyResults->frame_reports[i].gpuFrameTimeUs = ++ frameReports[i].gpuRenderEndTimeUs - frameReports[i - 1].gpuRenderEndTimeUs; ++ } ++ } ++ } ++ } ++ ++ return S_OK; ++ } ++ ++ ++ ++ + D3D11VideoDevice::D3D11VideoDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice) +@@ -3021,7 +3149,11 @@ namespace dxvk { + + Com presenter = new D3D11SwapChain( + m_container, m_device, pSurfaceFactory, pDesc); +- ++ ++ if (m_device->GetDXVKDevice()->features().nvLowLatency2) { ++ m_device->AddSwapchain(presenter.ref()); ++ } ++ + *ppSwapChain = presenter.ref(); + return S_OK; + } catch (const DxvkError& e) { +@@ -3078,17 +3210,18 @@ namespace dxvk { + Rc pDxvkDevice, + D3D_FEATURE_LEVEL FeatureLevel, + UINT FeatureFlags) +- : m_dxgiAdapter (pAdapter), +- m_dxvkInstance (pDxvkInstance), +- m_dxvkAdapter (pDxvkAdapter), +- m_dxvkDevice (pDxvkDevice), +- m_d3d11Device (this, FeatureLevel, FeatureFlags), +- m_d3d11DeviceExt(this, &m_d3d11Device), +- m_d3d11Interop (this, &m_d3d11Device), +- m_d3d11Video (this, &m_d3d11Device), +- m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), +- m_metaDevice (this), +- m_dxvkFactory (this, &m_d3d11Device) { ++ : m_dxgiAdapter (pAdapter), ++ m_dxvkInstance (pDxvkInstance), ++ m_dxvkAdapter (pDxvkAdapter), ++ m_dxvkDevice (pDxvkDevice), ++ m_d3d11Device (this, FeatureLevel, FeatureFlags), ++ m_d3d11DeviceExt (this, &m_d3d11Device), ++ m_d3d11Interop (this, &m_d3d11Device), ++ m_d3dLowLatencyDevice (this, &m_d3d11Device), ++ m_d3d11Video (this, &m_d3d11Device), ++ m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), ++ m_metaDevice (this), ++ m_dxvkFactory (this, &m_d3d11Device) { + + } + +@@ -3142,7 +3275,12 @@ namespace dxvk { + *ppvObject = ref(&m_d3d11DeviceExt); + return S_OK; + } +- ++ ++ if (riid == __uuidof(ID3DLowLatencyDevice)) { ++ *ppvObject = ref(&m_d3dLowLatencyDevice); ++ return S_OK; ++ } ++ + if (riid == __uuidof(IDXGIDXVKDevice)) { + *ppvObject = ref(&m_metaDevice); + return S_OK; +diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h +index 7a44b5ad99c..7372bbec168 100644 +--- a/src/d3d11/d3d11_device.h ++++ b/src/d3d11/d3d11_device.h +@@ -24,6 +24,7 @@ + #include "d3d11_options.h" + #include "d3d11_shader.h" + #include "d3d11_state.h" ++#include "d3d11_swapchain.h" + #include "d3d11_util.h" + + namespace dxvk { +@@ -428,6 +429,22 @@ namespace dxvk { + + bool Is11on12Device() const; + ++ void AddSwapchain(D3D11SwapChain* swapchain) { ++ m_swapchains.push_back(swapchain); ++ } ++ ++ void RemoveSwapchain(D3D11SwapChain* swapchain) { ++ std::remove(m_swapchains.begin(), m_swapchains.end(), swapchain); ++ } ++ ++ UINT GetSwapchainCount() { ++ return m_swapchains.size(); ++ } ++ ++ D3D11SwapChain* GetLowLatencySwapChain() { ++ return (m_swapchains.size()) == 1 ? m_swapchains[0] : nullptr; ++ } ++ + static D3D_FEATURE_LEVEL GetMaxFeatureLevel( + const Rc& Instance, + const Rc& Adapter); +@@ -464,6 +481,8 @@ namespace dxvk { + D3D_FEATURE_LEVEL m_maxFeatureLevel; + D3D11DeviceFeatures m_deviceFeatures; + ++ std::vector m_swapchains; ++ + HRESULT CreateShaderModule( + D3D11CommonShader* pShaderModule, + DxvkShaderKey ShaderKey, +@@ -545,28 +564,28 @@ namespace dxvk { + uint64_t* gpuVAStart, + uint64_t* gpuVASize); + +- bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( ++ bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( + ID3D11Resource* pResource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, + ID3D11UnorderedAccessView** ppUAV, + uint32_t* pDriverHandle); + +- bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( ++ bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( + ID3D11Resource* pResource, + const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, + ID3D11ShaderResourceView** ppSRV, + uint32_t* pDriverHandle); + +- bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( ++ bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( + const D3D11_SAMPLER_DESC* pSamplerDesc, + ID3D11SamplerState** ppSamplerState, + uint32_t* pDriverHandle); +- ++ + private: + + D3D11DXGIDevice* m_container; + D3D11Device* m_device; +- ++ + void AddSamplerAndHandleNVX( + ID3D11SamplerState* pSampler, + uint32_t Handle); +@@ -586,6 +605,46 @@ namespace dxvk { + std::unordered_map m_srvHandleToPtr; + }; + ++ /** ++ * \brief Extended D3D11 device ++ */ ++ class D3D11LowLatencyDevice : public ID3DLowLatencyDevice { ++ ++ public: ++ ++ D3D11LowLatencyDevice( ++ D3D11DXGIDevice* pContainer, ++ D3D11Device* pDevice); ++ ++ ULONG STDMETHODCALLTYPE AddRef(); ++ ++ ULONG STDMETHODCALLTYPE Release(); ++ ++ HRESULT STDMETHODCALLTYPE QueryInterface( ++ REFIID riid, ++ void** ppvObject); ++ ++ BOOL STDMETHODCALLTYPE SupportsLowLatency(); ++ ++ HRESULT STDMETHODCALLTYPE LatencySleep(); ++ ++ HRESULT STDMETHODCALLTYPE SetLatencySleepMode( ++ BOOL lowLatencyMode, ++ BOOL lowLatencyBoost, ++ uint32_t minimumIntervalUs); ++ ++ HRESULT STDMETHODCALLTYPE SetLatencyMarker( ++ uint64_t frameID, ++ uint32_t markerType); ++ ++ HRESULT STDMETHODCALLTYPE GetLatencyInfo( ++ D3D11_LATENCY_RESULTS* latencyResults); ++ ++ private: ++ ++ D3D11DXGIDevice* m_container; ++ D3D11Device* m_device; ++ }; + + /** + * \brief D3D11 video device +@@ -856,12 +915,13 @@ namespace dxvk { + Rc m_dxvkAdapter; + Rc m_dxvkDevice; + +- D3D11Device m_d3d11Device; +- D3D11DeviceExt m_d3d11DeviceExt; +- D3D11VkInterop m_d3d11Interop; +- D3D11VideoDevice m_d3d11Video; +- D3D11on12Device m_d3d11on12; +- DXGIDXVKDevice m_metaDevice; ++ D3D11Device m_d3d11Device; ++ D3D11DeviceExt m_d3d11DeviceExt; ++ D3D11VkInterop m_d3d11Interop; ++ D3D11LowLatencyDevice m_d3dLowLatencyDevice; ++ D3D11VideoDevice m_d3d11Video; ++ D3D11on12Device m_d3d11on12; ++ DXGIDXVKDevice m_metaDevice; + + DXGIVkSwapChainFactory m_dxvkFactory; + +diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h +index 587cde1394e..49b301b0fdb 100644 +--- a/src/d3d11/d3d11_interfaces.h ++++ b/src/d3d11/d3d11_interfaces.h +@@ -16,6 +16,7 @@ enum D3D11_VK_EXTENSION : uint32_t { + D3D11_VK_EXT_BARRIER_CONTROL = 3, + D3D11_VK_NVX_BINARY_IMPORT = 4, + D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5, ++ D3D11_VK_NV_LOW_LATENCY_2 = 6 + }; + + +@@ -27,6 +28,33 @@ enum D3D11_VK_BARRIER_CONTROL : uint32_t { + D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1, + }; + ++/** ++ * \brief Frame Report Info ++ */ ++typedef struct D3D11_LATENCY_RESULTS ++{ ++ UINT32 version; ++ struct D3D11_FRAME_REPORT { ++ UINT64 frameID; ++ UINT64 inputSampleTime; ++ UINT64 simStartTime; ++ UINT64 simEndTime; ++ UINT64 renderSubmitStartTime; ++ UINT64 renderSubmitEndTime; ++ UINT64 presentStartTime; ++ UINT64 presentEndTime; ++ UINT64 driverStartTime; ++ UINT64 driverEndTime; ++ UINT64 osRenderQueueStartTime; ++ UINT64 osRenderQueueEndTime; ++ UINT64 gpuRenderStartTime; ++ UINT64 gpuRenderEndTime; ++ UINT32 gpuActiveRenderTimeUs; ++ UINT32 gpuFrameTimeUs; ++ UINT8 rsvd[120]; ++ } frame_reports[64]; ++ UINT8 rsvd[32]; ++} D3D11_LATENCY_RESULTS; + + /** + * \brief Extended shader interface +@@ -114,6 +142,33 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { + uint32_t* pCudaTextureHandle) = 0; + }; + ++/** ++ * \brief Extended extended D3D11 device ++ * ++ * Introduces methods to get virtual addresses and driver ++ * handles for resources, and create and destroy objects ++ * for D3D11-Cuda interop. ++ */ ++MIDL_INTERFACE("f3112584-41f9-348d-a59b-00b7e1d285d6") ++ID3DLowLatencyDevice : public IUnknown { ++ static const GUID guid; ++ ++ virtual BOOL STDMETHODCALLTYPE SupportsLowLatency() = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE LatencySleep() = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE SetLatencySleepMode( ++ BOOL lowLatencyMode, ++ BOOL lowLatencyBoost, ++ uint32_t minimumIntervalUs) = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE SetLatencyMarker( ++ uint64_t frameID, ++ uint32_t markerType) = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE GetLatencyInfo( ++ D3D11_LATENCY_RESULTS* latencyResults) = 0; ++}; + + /** + * \brief Extended D3D11 context +@@ -182,17 +237,18 @@ ID3D11VkExtContext1 : public ID3D11VkExtContext { + uint32_t numWriteResources) = 0; + }; + +- + #ifdef _MSC_VER + struct __declspec(uuid("bb8a4fb9-3935-4762-b44b-35189a26414a")) ID3D11VkExtShader; + struct __declspec(uuid("8a6e3c42-f74c-45b7-8265-a231b677ca17")) ID3D11VkExtDevice; + struct __declspec(uuid("cfcf64ef-9586-46d0-bca4-97cf2ca61b06")) ID3D11VkExtDevice1; + struct __declspec(uuid("fd0bca13-5cb6-4c3a-987e-4750de2ca791")) ID3D11VkExtContext; + struct __declspec(uuid("874b09b2-ae0b-41d8-8476-5f3b7a0e879d")) ID3D11VkExtContext1; ++struct __declspec(uuid("f3112584-41f9-348d-a59b-00b7e1d285d6")) ID3DLowLatencyDevice; + #else + __CRT_UUID_DECL(ID3D11VkExtShader, 0xbb8a4fb9,0x3935,0x4762,0xb4,0x4b,0x35,0x18,0x9a,0x26,0x41,0x4a); + __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17); + __CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); + __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); + __CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); ++__CRT_UUID_DECL(ID3DLowLatencyDevice, 0xf3112584,0x41f9,0x348d,0xa5,0x9b,0x00,0xb7,0xe1,0xd2,0x85,0xd6); + #endif +diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp +index 0e823f410ef..4faffa00e48 100644 +--- a/src/d3d11/d3d11_swapchain.cpp ++++ b/src/d3d11/d3d11_swapchain.cpp +@@ -351,6 +351,34 @@ namespace dxvk { + *pFrameStatistics = m_frameStatistics; + } + ++ VkResult D3D11SwapChain::SetLatencySleepMode( ++ bool lowLatencyMode, ++ bool lowLatencyBoost, ++ uint32_t minimumIntervalUs) { ++ if (lowLatencyMode && !LowLatencyEnabled()) { ++ RecreateSwapChain(); ++ } ++ return m_presenter->setLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs); ++ } ++ ++ VkResult D3D11SwapChain::LatencySleep() { ++ return m_presenter->latencySleep(); ++ } ++ ++ void D3D11SwapChain::SetLatencyMarker( ++ VkLatencyMarkerNV marker, ++ uint64_t presentId) { ++ m_presenter->setLatencyMarker(marker, presentId); ++ } ++ ++ VkResult D3D11SwapChain::GetLatencyTimings( ++ std::vector& frameReports) { ++ return m_presenter->getLatencyTimings(frameReports); ++ } ++ ++ bool D3D11SwapChain::LowLatencyEnabled() { ++ return m_presenter->lowLatencyEnabled(); ++ } + + HRESULT D3D11SwapChain::PresentImage(UINT SyncInterval) { + // Flush pending rendering commands before +@@ -410,9 +438,11 @@ namespace dxvk { + uint32_t Repeat) { + auto lock = pContext->LockContext(); + +- // Bump frame ID as necessary +- if (!Repeat) +- m_frameId += 1; ++ if (!Repeat) { ++ m_frameId = (m_presenter->lowLatencyEnabled() && m_device->getLatencyMarkers().present) ? ++ m_device->getLatencyMarkers().present : ++ m_frameId + 1; ++ } + + // Present from CS thread so that we don't + // have to synchronize with it first. +diff --git a/src/d3d11/d3d11_swapchain.h b/src/d3d11/d3d11_swapchain.h +index 00073d7690e..a3ecf634381 100644 +--- a/src/d3d11/d3d11_swapchain.h ++++ b/src/d3d11/d3d11_swapchain.h +@@ -86,6 +86,22 @@ namespace dxvk { + void STDMETHODCALLTYPE GetFrameStatistics( + DXGI_VK_FRAME_STATISTICS* pFrameStatistics); + ++ VkResult SetLatencySleepMode( ++ bool lowLatencyMode, ++ bool lowLatencyBoost, ++ uint32_t minimumIntervalUs); ++ ++ VkResult LatencySleep(); ++ ++ void SetLatencyMarker( ++ VkLatencyMarkerNV marker, ++ uint64_t presentId); ++ ++ VkResult GetLatencyTimings( ++ std::vector& frameReports); ++ ++ bool LowLatencyEnabled(); ++ + private: + + enum BindingIds : uint32_t { +@@ -176,4 +192,4 @@ namespace dxvk { + + }; + +-} +\ No newline at end of file ++} +diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp +index cf4c3cce68f..6a3b4f08180 100644 +--- a/src/dxvk/dxvk_adapter.cpp ++++ b/src/dxvk/dxvk_adapter.cpp +@@ -927,6 +927,9 @@ namespace dxvk { + m_deviceFeatures.khrPresentWait.pNext = std::exchange(m_deviceFeatures.core.pNext, &m_deviceFeatures.khrPresentWait); + } + ++ if (m_deviceExtensions.supports(VK_NV_LOW_LATENCY_2_EXTENSION_NAME)) ++ m_deviceFeatures.nvLowLatency2 = VK_TRUE; ++ + if (m_deviceExtensions.supports(VK_NVX_BINARY_IMPORT_EXTENSION_NAME)) + m_deviceFeatures.nvxBinaryImport = VK_TRUE; + +@@ -994,6 +997,7 @@ namespace dxvk { + &devExtensions.khrPresentWait, + &devExtensions.khrSwapchain, + &devExtensions.khrWin32KeyedMutex, ++ &devExtensions.nvLowLatency2, + &devExtensions.nvxBinaryImport, + &devExtensions.nvxImageViewHandle, + }}; +@@ -1133,8 +1137,13 @@ namespace dxvk { + enabledFeatures.khrPresentWait.pNext = std::exchange(enabledFeatures.core.pNext, &enabledFeatures.khrPresentWait); + } + +- if (devExtensions.nvxBinaryImport) ++ if (devExtensions.nvxBinaryImport) { + enabledFeatures.nvxBinaryImport = VK_TRUE; ++ } ++ ++ if (devExtensions.nvLowLatency2) { ++ enabledFeatures.nvLowLatency2 = VK_TRUE; ++ } + + if (devExtensions.nvxImageViewHandle) + enabledFeatures.nvxImageViewHandle = VK_TRUE; +@@ -1279,6 +1288,8 @@ namespace dxvk { + "\n presentId : ", features.khrPresentId.presentId ? "1" : "0", + "\n", VK_KHR_PRESENT_WAIT_EXTENSION_NAME, + "\n presentWait : ", features.khrPresentWait.presentWait ? "1" : "0", ++ "\n", VK_NV_LOW_LATENCY_2_EXTENSION_NAME, ++ "\n extension supported : ", features.nvLowLatency2 ? "1" : "0", + "\n", VK_NVX_BINARY_IMPORT_EXTENSION_NAME, + "\n extension supported : ", features.nvxBinaryImport ? "1" : "0", + "\n", VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, +diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp +index 3bd3aa953d4..54b50ea533c 100644 +--- a/src/dxvk/dxvk_cmdlist.cpp ++++ b/src/dxvk/dxvk_cmdlist.cpp +@@ -56,10 +56,12 @@ namespace dxvk { + + VkResult DxvkCommandSubmission::submit( + DxvkDevice* device, +- VkQueue queue) { ++ VkQueue queue, ++ uint64_t frameId) { + auto vk = device->vkd(); + + VkSubmitInfo2 submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2 }; ++ VkLatencySubmissionPresentIdNV latencySubmitInfo = { VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV }; + + if (!m_semaphoreWaits.empty()) { + submitInfo.waitSemaphoreInfoCount = m_semaphoreWaits.size(); +@@ -76,6 +78,11 @@ namespace dxvk { + submitInfo.pSignalSemaphoreInfos = m_semaphoreSignals.data(); + } + ++ if (device->features().nvLowLatency2 && frameId && !m_commandBuffers.empty()) { ++ latencySubmitInfo.presentID = frameId; ++ latencySubmitInfo.pNext = std::exchange(submitInfo.pNext, &latencySubmitInfo); ++ } ++ + VkResult vr = VK_SUCCESS; + + if (!this->isEmpty()) +@@ -206,7 +213,7 @@ namespace dxvk { + } + + +- VkResult DxvkCommandList::submit() { ++ VkResult DxvkCommandList::submit(uint64_t frameId) { + VkResult status = VK_SUCCESS; + + const auto& graphics = m_device->queues().graphics; +@@ -238,7 +245,7 @@ namespace dxvk { + // for any prior submissions, then block any subsequent ones + m_commandSubmission.signalSemaphore(m_bindSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); + +- if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) ++ if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId))) + return status; + + sparseBind->waitSemaphore(m_bindSemaphore, 0); +@@ -259,7 +266,7 @@ namespace dxvk { + if (m_device->hasDedicatedTransferQueue() && !m_commandSubmission.isEmpty()) { + m_commandSubmission.signalSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); + +- if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle))) ++ if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle, frameId))) + return status; + + m_commandSubmission.waitSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); +@@ -297,7 +304,7 @@ namespace dxvk { + } + + // Finally, submit all graphics commands of the current submission +- if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) ++ if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId))) + return status; + } + +diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h +index b9b9a165dd3..f9527516e17 100644 +--- a/src/dxvk/dxvk_cmdlist.h ++++ b/src/dxvk/dxvk_cmdlist.h +@@ -94,7 +94,8 @@ namespace dxvk { + */ + VkResult submit( + DxvkDevice* device, +- VkQueue queue); ++ VkQueue queue, ++ uint64_t frameId); + + /** + * \brief Resets object +@@ -199,7 +200,7 @@ namespace dxvk { + * \brief Submits command list + * \returns Submission status + */ +- VkResult submit(); ++ VkResult submit(uint64_t frameId); + + /** + * \brief Stat counters +diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp +index 9a053791a7b..44d208c41aa 100644 +--- a/src/dxvk/dxvk_device.cpp ++++ b/src/dxvk/dxvk_device.cpp +@@ -18,6 +18,7 @@ namespace dxvk { + m_properties (adapter->devicePropertiesExt()), + m_perfHints (getPerfHints()), + m_objects (this), ++ m_latencyMarkers ({}), + m_queues (queues), + m_submissionQueue (this, queueCallback) { + +@@ -274,6 +275,7 @@ namespace dxvk { + DxvkSubmitStatus* status) { + DxvkSubmitInfo submitInfo = { }; + submitInfo.cmdList = commandList; ++ submitInfo.frameId = m_latencyMarkers.render; + m_submissionQueue.submit(submitInfo, status); + + std::lock_guard statLock(m_statLock); +diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h +index a24ee311bf5..cfef76a0874 100644 +--- a/src/dxvk/dxvk_device.h ++++ b/src/dxvk/dxvk_device.h +@@ -66,7 +66,16 @@ namespace dxvk { + DxvkDeviceQueue transfer; + DxvkDeviceQueue sparse; + }; +- ++ ++ /** ++ * \brief Latency marker frame ids ++ */ ++ struct DxvkDeviceLowLatencyMarkers { ++ uint64_t simulation; ++ uint64_t render; ++ uint64_t present; ++ }; ++ + /** + * \brief DXVK device + * +@@ -534,6 +543,44 @@ namespace dxvk { + * used by the GPU can be safely destroyed. + */ + void waitForIdle(); ++ ++ /** ++ * \brief Updates the frame id for the given frame marker ++ * ++ * \param [in] marker The marker to set the frame ID for ++ * \param [in] id The frame ID to set ++ */ ++ void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t id) { ++ switch (marker) { ++ case VK_LATENCY_MARKER_SIMULATION_START_NV: ++ m_latencyMarkers.simulation = id; ++ break; ++ case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV: ++ m_latencyMarkers.render = id; ++ break; ++ case VK_LATENCY_MARKER_PRESENT_START_NV: ++ m_latencyMarkers.present = id; ++ break; ++ default: ++ break; ++ } ++ } ++ ++ /** ++ * \brief Resets the latency markers back to zero ++ */ ++ void resetLatencyMarkers() { ++ m_latencyMarkers = {}; ++ } ++ ++ /** ++ * \brief Returns the current set of latency marker frame IDs ++ * ++ * \returns The current set of frame marker IDs ++ */ ++ DxvkDeviceLowLatencyMarkers getLatencyMarkers() { ++ return m_latencyMarkers; ++ } + + private: + +@@ -549,6 +596,8 @@ namespace dxvk { + DxvkDevicePerfHints m_perfHints; + DxvkObjects m_objects; + ++ DxvkDeviceLowLatencyMarkers m_latencyMarkers; ++ + sync::Spinlock m_statLock; + DxvkStatCounters m_statCounters; + +diff --git a/src/dxvk/dxvk_device_info.h b/src/dxvk/dxvk_device_info.h +index e23a0e1812e..ec0bc5a645e 100644 +--- a/src/dxvk/dxvk_device_info.h ++++ b/src/dxvk/dxvk_device_info.h +@@ -68,9 +68,10 @@ namespace dxvk { + VkPhysicalDeviceMaintenance5FeaturesKHR khrMaintenance5; + VkPhysicalDevicePresentIdFeaturesKHR khrPresentId; + VkPhysicalDevicePresentWaitFeaturesKHR khrPresentWait; ++ VkBool32 nvLowLatency2; + VkBool32 nvxBinaryImport; + VkBool32 nvxImageViewHandle; + VkBool32 khrWin32KeyedMutex; + }; + +-} +\ No newline at end of file ++} +diff --git a/src/dxvk/dxvk_extensions.h b/src/dxvk/dxvk_extensions.h +index 8164ccf6ad6..041d00c3cee 100644 +--- a/src/dxvk/dxvk_extensions.h ++++ b/src/dxvk/dxvk_extensions.h +@@ -325,6 +325,7 @@ namespace dxvk { + DxvkExt khrPresentWait = { VK_KHR_PRESENT_WAIT_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt khrSwapchain = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, DxvkExtMode::Required }; + DxvkExt khrWin32KeyedMutex = { VK_KHR_WIN32_KEYED_MUTEX_EXTENSION_NAME, DxvkExtMode::Optional }; ++ DxvkExt nvLowLatency2 = { VK_NV_LOW_LATENCY_2_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt nvxBinaryImport = { VK_NVX_BINARY_IMPORT_EXTENSION_NAME, DxvkExtMode::Disabled }; + DxvkExt nvxImageViewHandle = { VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, DxvkExtMode::Disabled }; + }; +diff --git a/src/dxvk/dxvk_presenter.cpp b/src/dxvk/dxvk_presenter.cpp +index 10f13da2783..9f7c6a0def0 100644 +--- a/src/dxvk/dxvk_presenter.cpp ++++ b/src/dxvk/dxvk_presenter.cpp +@@ -18,6 +18,15 @@ namespace dxvk { + // with present operations and periodically signals the event + if (m_device->features().khrPresentWait.presentWait && m_signal != nullptr) + m_frameThread = dxvk::thread([this] { runFrameThread(); }); ++ ++ // If nvLowLatency2 is supported create the fence ++ if (m_device->features().nvLowLatency2) { ++ DxvkFenceCreateInfo info = {}; ++ info.initialValue = 0; ++ info.sharedType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_FLAG_BITS_MAX_ENUM; ++ ++ m_lowLatencyFence = DxvkFenceValuePair(m_device->createFence(info), 0u); ++ } + } + + +@@ -48,6 +57,7 @@ namespace dxvk { + + + VkResult Presenter::acquireNextImage(PresenterSync& sync, uint32_t& index) { ++ std::lock_guard lock(m_lowLatencyMutex); + sync = m_semaphores.at(m_frameIndex); + + // Don't acquire more than one image at a time +@@ -68,11 +78,13 @@ namespace dxvk { + VkResult Presenter::presentImage( + VkPresentModeKHR mode, + uint64_t frameId) { ++ std::lock_guard lock(m_lowLatencyMutex); ++ + PresenterSync sync = m_semaphores.at(m_frameIndex); + + VkPresentIdKHR presentId = { VK_STRUCTURE_TYPE_PRESENT_ID_KHR }; + presentId.swapchainCount = 1; +- presentId.pPresentIds = &frameId; ++ presentId.pPresentIds = &frameId; + + VkSwapchainPresentModeInfoEXT modeInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT }; + modeInfo.swapchainCount = 1; +@@ -151,6 +163,8 @@ namespace dxvk { + + + VkResult Presenter::recreateSwapChain(const PresenterDesc& desc) { ++ std::lock_guard lock(m_lowLatencyMutex); ++ + if (m_swapchain) + destroySwapchain(); + +@@ -293,6 +307,9 @@ namespace dxvk { + modeInfo.presentModeCount = compatibleModes.size(); + modeInfo.pPresentModes = compatibleModes.data(); + ++ VkSwapchainLatencyCreateInfoNV lowLatencyInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV }; ++ lowLatencyInfo.latencyModeEnable = VK_TRUE; ++ + VkSwapchainCreateInfoKHR swapInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR }; + swapInfo.surface = m_surface; + swapInfo.minImageCount = m_info.imageCount; +@@ -314,6 +331,9 @@ namespace dxvk { + if (m_device->features().extSwapchainMaintenance1.swapchainMaintenance1) + modeInfo.pNext = std::exchange(swapInfo.pNext, &modeInfo); + ++ if (m_device->features().nvLowLatency2) ++ lowLatencyInfo.pNext = std::exchange(swapInfo.pNext, &lowLatencyInfo); ++ + Logger::info(str::format( + "Presenter: Actual swap chain properties:" + "\n Format: ", m_info.format.format, +@@ -322,11 +342,21 @@ namespace dxvk { + "\n Buffer size: ", m_info.imageExtent.width, "x", m_info.imageExtent.height, + "\n Image count: ", m_info.imageCount, + "\n Exclusive FS: ", desc.fullScreenExclusive)); +- ++ + if ((status = m_vkd->vkCreateSwapchainKHR(m_vkd->device(), + &swapInfo, nullptr, &m_swapchain))) + return status; +- ++ ++ if (m_device->features().nvLowLatency2) { ++ VkLatencySleepModeInfoNV sleepModeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV }; ++ sleepModeInfo.lowLatencyMode = m_lowLatencyEnabled; ++ sleepModeInfo.lowLatencyBoost = m_lowLatencyBoost; ++ sleepModeInfo.minimumIntervalUs = m_minimumIntervalUs; ++ ++ if ((status = m_vkd->vkSetLatencySleepModeNV(m_vkd->device(), m_swapchain, &sleepModeInfo))) ++ return status; ++ } ++ + // Acquire images and create views + std::vector images; + +@@ -422,6 +452,69 @@ namespace dxvk { + m_vkd->vkSetHdrMetadataEXT(m_vkd->device(), 1, &m_swapchain, &hdrMetadata); + } + ++ VkResult Presenter::setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs) { ++ VkLatencySleepModeInfoNV sleepModeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV }; ++ sleepModeInfo.lowLatencyMode = lowLatencyMode; ++ sleepModeInfo.lowLatencyBoost = lowLatencyBoost; ++ sleepModeInfo.minimumIntervalUs = minimumIntervalUs; ++ ++ std::lock_guard lock(m_lowLatencyMutex); ++ VkResult status = m_vkd->vkSetLatencySleepModeNV(m_vkd->device(), m_swapchain, &sleepModeInfo); ++ ++ m_lowLatencyEnabled = lowLatencyMode; ++ m_lowLatencyBoost = lowLatencyBoost; ++ m_minimumIntervalUs = minimumIntervalUs; ++ ++ if (!lowLatencyMode) ++ m_device->resetLatencyMarkers(); ++ ++ return status; ++ } ++ ++ VkResult Presenter::latencySleep() { ++ VkSemaphore sem = m_lowLatencyFence.fence->handle(); ++ uint64_t waitValue = m_lowLatencyFence.value + 1; ++ m_lowLatencyFence.value++; ++ ++ VkLatencySleepInfoNV sleepInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV }; ++ sleepInfo.signalSemaphore = sem; ++ sleepInfo.value = waitValue; ++ ++ { ++ std::lock_guard lock(m_lowLatencyMutex); ++ m_vkd->vkLatencySleepNV(m_vkd->device(), m_swapchain, &sleepInfo); ++ } ++ ++ m_lowLatencyFence.fence->wait(waitValue); ++ ++ return VK_SUCCESS; ++ } ++ ++ void Presenter::setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId) { ++ VkSetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV }; ++ markerInfo.presentID = presentId; ++ markerInfo.marker = marker; ++ ++ std::lock_guard lock(m_lowLatencyMutex); ++ m_vkd->vkSetLatencyMarkerNV(m_vkd->device(), m_swapchain, &markerInfo); ++ } ++ ++ VkResult Presenter::getLatencyTimings(std::vector& frameReports) { ++ VkGetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV }; ++ uint32_t timingCount = 0; ++ ++ std::lock_guard lock(m_lowLatencyMutex); ++ m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo); ++ ++ if (timingCount != 0) { ++ frameReports.resize(timingCount, { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV }); ++ markerInfo.pTimings = frameReports.data(); ++ ++ m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo); ++ } ++ ++ return VK_SUCCESS; ++ } + + VkResult Presenter::getSupportedFormats(std::vector& formats, VkFullScreenExclusiveEXT fullScreenExclusive) const { + uint32_t numFormats = 0; +diff --git a/src/dxvk/dxvk_presenter.h b/src/dxvk/dxvk_presenter.h +index c5ba1273364..aa52b97b4ce 100644 +--- a/src/dxvk/dxvk_presenter.h ++++ b/src/dxvk/dxvk_presenter.h +@@ -15,6 +15,7 @@ + #include "../vulkan/vulkan_loader.h" + + #include "dxvk_format.h" ++#include "dxvk_fence.h" + + namespace dxvk { + +@@ -224,6 +225,42 @@ namespace dxvk { + */ + void setHdrMetadata(const VkHdrMetadataEXT& hdrMetadata); + ++ /** ++ * \brief Set the latency mode of the swapchain ++ * ++ * \param [in] enableLowLatency Determines if the low latency ++ * mode should be enabled of disabled ++ */ ++ VkResult setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs); ++ ++ /** ++ * \brief Delay rendering work for lower latency ++ */ ++ VkResult latencySleep(); ++ ++ /** ++ * \brief Set a latency marker for the given stage ++ * ++ * \param [in] marker The stage this marker is for ++ * \param [in] presentId The presentId this marker is for ++ */ ++ void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId); ++ ++ /** ++ * \brief Get the low latency timing info ++ * ++ * \param [out] latencyInfo The structure to place ++ * the latency timings into ++ */ ++ VkResult getLatencyTimings(std::vector& frameReports); ++ ++ /** ++ * \brief Returns the low latency enabled state ++ */ ++ bool lowLatencyEnabled() { ++ return m_lowLatencyEnabled; ++ } ++ + private: + + Rc m_device; +@@ -237,6 +274,11 @@ namespace dxvk { + VkSurfaceKHR m_surface = VK_NULL_HANDLE; + VkSwapchainKHR m_swapchain = VK_NULL_HANDLE; + ++ DxvkFenceValuePair m_lowLatencyFence = {}; ++ bool m_lowLatencyEnabled = false; ++ bool m_lowLatencyBoost = false; ++ uint32_t m_minimumIntervalUs = 0; ++ + std::vector m_images; + std::vector m_semaphores; + +@@ -250,6 +292,7 @@ namespace dxvk { + FpsLimiter m_fpsLimiter; + + dxvk::mutex m_frameMutex; ++ dxvk::mutex m_lowLatencyMutex; + dxvk::condition_variable m_frameCond; + dxvk::thread m_frameThread; + std::queue m_frameQueue; +diff --git a/src/dxvk/dxvk_queue.cpp b/src/dxvk/dxvk_queue.cpp +index 7273a37d608..546a1f838b8 100644 +--- a/src/dxvk/dxvk_queue.cpp ++++ b/src/dxvk/dxvk_queue.cpp +@@ -126,7 +126,7 @@ namespace dxvk { + m_callback(true); + + if (entry.submit.cmdList != nullptr) +- entry.result = entry.submit.cmdList->submit(); ++ entry.result = entry.submit.cmdList->submit(entry.submit.frameId); + else if (entry.present.presenter != nullptr) + entry.result = entry.present.presenter->presentImage(entry.present.presentMode, entry.present.frameId); + +@@ -226,4 +226,4 @@ namespace dxvk { + } + } + +-} +\ No newline at end of file ++} +diff --git a/src/dxvk/dxvk_queue.h b/src/dxvk/dxvk_queue.h +index 38d91f5dd09..a3c6e581b31 100644 +--- a/src/dxvk/dxvk_queue.h ++++ b/src/dxvk/dxvk_queue.h +@@ -32,6 +32,7 @@ namespace dxvk { + */ + struct DxvkSubmitInfo { + Rc cmdList; ++ uint64_t frameId; + }; + + +diff --git a/src/vulkan/vulkan_loader.h b/src/vulkan/vulkan_loader.h +index 1741ccb8722..6b0f80ea248 100644 +--- a/src/vulkan/vulkan_loader.h ++++ b/src/vulkan/vulkan_loader.h +@@ -452,6 +452,14 @@ namespace dxvk::vk { + VULKAN_FN(wine_vkAcquireKeyedMutex); + VULKAN_FN(wine_vkReleaseKeyedMutex); + #endif ++ ++ #ifdef VK_NV_LOW_LATENCY_2_EXTENSION_NAME ++ VULKAN_FN(vkSetLatencySleepModeNV); ++ VULKAN_FN(vkLatencySleepNV); ++ VULKAN_FN(vkSetLatencyMarkerNV); ++ VULKAN_FN(vkGetLatencyTimingsNV); ++ VULKAN_FN(vkQueueNotifyOutOfBandNV); ++ #endif + }; + + } diff --git a/patches/proton/81-nv_low_latency_vkd3d_proton.patch b/patches/proton/81-nv_low_latency_vkd3d_proton.patch new file mode 100644 index 0000000000..e24e709d7a --- /dev/null +++ b/patches/proton/81-nv_low_latency_vkd3d_proton.patch @@ -0,0 +1,1299 @@ +From 56565ab8587c5785e63bd4cf4eb9e905d4affcf4 Mon Sep 17 00:00:00 2001 +From: Eric Sullivan +Date: Sun, 8 Oct 2023 22:46:03 -0700 +Subject: [PATCH 1/2] khronos: Update Vulkan headers + +--- + subprojects/Vulkan-Headers | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/subprojects/Vulkan-Headers b/subprojects/Vulkan-Headers +index a0c76b4ef7..bbe0f575eb 160000 +--- a/subprojects/Vulkan-Headers ++++ b/subprojects/Vulkan-Headers +@@ -1 +1 @@ +-Subproject commit a0c76b4ef76e219483755ff61dce6b67ff79f24b ++Subproject commit bbe0f575ebd6098369f0ac6c6a43532732ed0ba6 + +From dd574726b5c94824c7f0ee847aaf677f04892503 Mon Sep 17 00:00:00 2001 +From: Eric Sullivan +Date: Thu, 7 Sep 2023 09:27:14 -0700 +Subject: [PATCH 2/2] vkd3d: Add support for VK_NV_low_latency2 + +This commit add support for the VK_NV_low_latency2 extension, and +implements the ID3DLowLatencyDevice, and ID3D12CommandQueueExt +interfaces. +--- + include/meson.build | 1 + + include/vkd3d_command_queue_vkd3d_ext.idl | 30 +++ + include/vkd3d_device_vkd3d_ext.idl | 15 ++ + include/vkd3d_vk_includes.h | 34 ++- + libs/vkd3d/command.c | 41 ++- + libs/vkd3d/command_queue_vkd3d_ext.c | 100 +++++++ + libs/vkd3d/device.c | 14 +- + libs/vkd3d/device_vkd3d_ext.c | 158 ++++++++++- + libs/vkd3d/meson.build | 1 + + libs/vkd3d/swapchain.c | 315 +++++++++++++++++++++- + libs/vkd3d/vkd3d_private.h | 68 ++++- + libs/vkd3d/vulkan_procs.h | 7 + + 12 files changed, 763 insertions(+), 21 deletions(-) + create mode 100644 include/vkd3d_command_queue_vkd3d_ext.idl + create mode 100644 libs/vkd3d/command_queue_vkd3d_ext.c + +diff --git a/include/meson.build b/include/meson.build +index c58579e9d2..e6ef1767df 100644 +--- a/include/meson.build ++++ b/include/meson.build +@@ -12,6 +12,7 @@ vkd3d_idl = [ + 'vkd3d_dxgitype.idl', + 'vkd3d_swapchain_factory.idl', + 'vkd3d_command_list_vkd3d_ext.idl', ++ 'vkd3d_command_queue_vkd3d_ext.idl', + 'vkd3d_device_vkd3d_ext.idl', + 'vkd3d_core_interface.idl', + ] +diff --git a/include/vkd3d_command_queue_vkd3d_ext.idl b/include/vkd3d_command_queue_vkd3d_ext.idl +new file mode 100644 +index 0000000000..3c69f00a64 +--- /dev/null ++++ b/include/vkd3d_command_queue_vkd3d_ext.idl +@@ -0,0 +1,30 @@ ++/* ++ * * Copyright 2023 NVIDIA Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++import "vkd3d_d3d12.idl"; ++import "vkd3d_vk_includes.h"; ++ ++[ ++ uuid(40ed3f96-e773-e9bc-fc0c-e95560c99ad6), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3D12CommandQueueExt : IUnknown ++{ ++ HRESULT NotifyOutOfBandCommandQueue(D3D12_OUT_OF_BAND_CQ_TYPE type); ++} +diff --git a/include/vkd3d_device_vkd3d_ext.idl b/include/vkd3d_device_vkd3d_ext.idl +index 3e615d76a1..4a21ba763e 100644 +--- a/include/vkd3d_device_vkd3d_ext.idl ++++ b/include/vkd3d_device_vkd3d_ext.idl +@@ -54,3 +54,18 @@ interface ID3D12DXVKInteropDevice : IUnknown + HRESULT LockCommandQueue(ID3D12CommandQueue *queue); + HRESULT UnlockCommandQueue(ID3D12CommandQueue *queue); + } ++ ++[ ++ uuid(f3112584-41f9-348d-a59b-00b7e1d285d6), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3DLowLatencyDevice : IUnknown ++{ ++ BOOL SupportsLowLatency(); ++ HRESULT LatencySleep(); ++ HRESULT SetLatencySleepMode(BOOL low_latency_mode, BOOL low_latency_boost, UINT32 minimum_interval_us); ++ HRESULT SetLatencyMarker(UINT64 frameID, UINT32 markerType); ++ HRESULT GetLatencyInfo(D3D12_LATENCY_RESULTS *latency_results); ++} +diff --git a/include/vkd3d_vk_includes.h b/include/vkd3d_vk_includes.h +index c43e018935..020596130a 100644 +--- a/include/vkd3d_vk_includes.h ++++ b/include/vkd3d_vk_includes.h +@@ -41,9 +41,16 @@ typedef enum VkImageLayout VkImageLayout; + typedef enum D3D12_VK_EXTENSION + { + D3D12_VK_NVX_BINARY_IMPORT = 0x1, +- D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2 ++ D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2, ++ D3D12_VK_NV_LOW_LATENCY_2 = 0x3 + } D3D12_VK_EXTENSION; + ++typedef enum D3D12_OUT_OF_BAND_CQ_TYPE ++{ ++ OUT_OF_BAND_RENDER = 0, ++ OUT_OF_BAND_PRESENT = 1 ++} D3D12_OUT_OF_BAND_CQ_TYPE; ++ + typedef struct D3D12_CUBIN_DATA_HANDLE + { + VkCuFunctionNVX vkCuFunction; +@@ -61,5 +68,30 @@ typedef struct D3D12_UAV_INFO + UINT64 gpuVASize; + } D3D12_UAV_INFO; + ++typedef struct D3D12_LATENCY_RESULTS ++{ ++ UINT32 version; ++ struct D3D12_FRAME_REPORT { ++ UINT64 frameID; ++ UINT64 inputSampleTime; ++ UINT64 simStartTime; ++ UINT64 simEndTime; ++ UINT64 renderSubmitStartTime; ++ UINT64 renderSubmitEndTime; ++ UINT64 presentStartTime; ++ UINT64 presentEndTime; ++ UINT64 driverStartTime; ++ UINT64 driverEndTime; ++ UINT64 osRenderQueueStartTime; ++ UINT64 osRenderQueueEndTime; ++ UINT64 gpuRenderStartTime; ++ UINT64 gpuRenderEndTime; ++ UINT32 gpuActiveRenderTimeUs; ++ UINT32 gpuFrameTimeUs; ++ UINT8 rsvd[120]; ++ } frame_reports[64]; ++ UINT8 rsvd[32]; ++} D3D12_LATENCY_RESULTS; ++ + #endif // __VKD3D_VK_INCLUDES_H + +diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c +index 3d0ebaa6ff..171920ac2c 100644 +--- a/libs/vkd3d/command.c ++++ b/libs/vkd3d/command.c +@@ -14301,12 +14301,14 @@ static struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandLis + } + + /* ID3D12CommandQueue */ ++extern ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface); ++ + static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface) + { + return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface, ++HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface, + REFIID riid, void **object) + { + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); +@@ -14325,6 +14327,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman + return S_OK; + } + ++ if (IsEqualGUID(riid, &IID_ID3D12CommandQueueExt)) ++ { ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ d3d12_command_queue_vkd3d_ext_AddRef(&command_queue->ID3D12CommandQueueExt_iface); ++ *object = &command_queue->ID3D12CommandQueueExt_iface; ++ return S_OK; ++ } ++ + if (IsEqualGUID(riid, &IID_IDXGIVkSwapChainFactory)) + { + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); +@@ -14339,7 +14349,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface) ++ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface) + { + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + ULONG refcount = InterlockedIncrement(&command_queue->refcount); +@@ -14349,7 +14359,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if + return refcount; + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface) ++ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface) + { + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + ULONG refcount = InterlockedDecrement(&command_queue->refcount); +@@ -14823,6 +14833,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + sub.execute.cmd_count = num_command_buffers; + sub.execute.outstanding_submissions_counters = outstanding; + sub.execute.outstanding_submissions_counter_count = command_list_count; ++ sub.execute.frame_id = command_queue->device->frame_markers.render; + #ifdef VKD3D_ENABLE_BREADCRUMBS + sub.execute.breadcrumb_indices = breadcrumb_indices; + sub.execute.breadcrumb_indices_count = breadcrumb_indices ? command_list_count : 0; +@@ -14985,6 +14996,8 @@ static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc( + return desc; + } + ++extern CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl; ++ + static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = + { + /* IUnknown methods */ +@@ -15492,13 +15505,15 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu + const VkCommandBufferSubmitInfo *transition_cmd, + const VkSemaphoreSubmitInfo *transition_semaphore, + LONG **submission_counters, size_t num_submission_counters, +- bool debug_capture, bool split_submissions) ++ uint64_t frame_id, bool debug_capture, ++ bool split_submissions) + { + const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; + struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue; + VkSemaphoreSubmitInfo signal_semaphore_info; + VkSemaphoreSubmitInfo binary_semaphore_info; + VkSubmitInfo2 submit_desc[4]; ++ VkLatencySubmissionPresentIdNV latency_submit_present_info; + uint32_t num_submits; + VkQueue vk_queue; + unsigned int i; +@@ -15578,6 +15593,18 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu + num_submits += 2; + } + ++ if (command_queue->device->vk_info.NV_low_latency2 && ++ command_queue->device->swapchain_info.low_latency_swapchain && ++ dxgi_vk_swap_chain_low_latency_enabled(command_queue->device->swapchain_info.low_latency_swapchain)) ++ { ++ latency_submit_present_info.sType = VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV; ++ latency_submit_present_info.pNext = NULL; ++ latency_submit_present_info.presentID = frame_id; ++ ++ for (i = 0; i < num_submits; i++) ++ submit_desc[i].pNext = &latency_submit_present_info; ++ } ++ + #ifdef VKD3D_ENABLE_RENDERDOC + /* For each submission we have marked to be captured, we will first need to filter it + * based on VKD3D_AUTO_CAPTURE_COUNTS. +@@ -16078,7 +16105,9 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) + &transition_cmd, &transition_semaphore, + submission.execute.outstanding_submissions_counters, + submission.execute.outstanding_submissions_counter_count, +- submission.execute.debug_capture, submission.execute.split_submission); ++ submission.execute.frame_id, ++ submission.execute.debug_capture, ++ submission.execute.split_submission); + + /* command_queue_execute takes ownership of the outstanding_submission_counters allocation. + * The atomic counters are decremented when the submission is observed to be freed. +@@ -16140,6 +16169,7 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, + int rc; + + queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl; ++ queue->ID3D12CommandQueueExt_iface.lpVtbl = &d3d12_command_queue_vkd3d_ext_vtbl; + queue->refcount = 1; + + queue->desc = *desc; +@@ -16268,6 +16298,7 @@ void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource + + memset(&sub, 0, sizeof(sub)); + sub.type = VKD3D_SUBMISSION_EXECUTE; ++ sub.execute.frame_id = d3d12_queue->device->frame_markers.render; + sub.execute.transition_count = 1; + sub.execute.transitions = vkd3d_malloc(sizeof(*sub.execute.transitions)); + sub.execute.transitions[0].type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE; +diff --git a/libs/vkd3d/command_queue_vkd3d_ext.c b/libs/vkd3d/command_queue_vkd3d_ext.c +new file mode 100644 +index 0000000000..0fba03b058 +--- /dev/null ++++ b/libs/vkd3d/command_queue_vkd3d_ext.c +@@ -0,0 +1,100 @@ ++/* ++ * * Copyright 2023 NVIDIA Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API ++ ++#include "vkd3d_private.h" ++ ++static inline struct d3d12_command_queue *d3d12_command_queue_from_ID3D12CommandQueueExt(d3d12_command_queue_vkd3d_ext_iface *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueueExt_iface); ++} ++ ++extern ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(d3d12_command_queue_iface *iface); ++ ++ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface) ++{ ++ struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); ++ return d3d12_command_queue_AddRef(&command_queue->ID3D12CommandQueue_iface); ++} ++ ++extern ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(d3d12_command_queue_iface *iface); ++ ++static ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_Release(d3d12_command_queue_vkd3d_ext_iface *iface) ++{ ++ struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); ++ return d3d12_command_queue_Release(&command_queue->ID3D12CommandQueue_iface); ++} ++ ++extern HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(d3d12_command_queue_iface *iface, ++ REFIID iid, void **object); ++ ++static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_QueryInterface(d3d12_command_queue_vkd3d_ext_iface *iface, ++ REFIID iid, void **out) ++{ ++ struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); ++ TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); ++ return d3d12_command_queue_QueryInterface(&command_queue->ID3D12CommandQueue_iface, iid, out); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue(d3d12_command_queue_vkd3d_ext_iface *iface, D3D12_OUT_OF_BAND_CQ_TYPE type) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs; ++ struct d3d12_command_queue* command_queue; ++ VkOutOfBandQueueTypeInfoNV queue_info; ++ VkOutOfBandQueueTypeNV queue_type; ++ ++ command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); ++ ++ if (!command_queue->device->vk_info.NV_low_latency2) ++ return E_NOTIMPL; ++ ++ vk_procs = &command_queue->device->vk_procs; ++ ++ switch (type) ++ { ++ case OUT_OF_BAND_RENDER: ++ queue_type = VK_OUT_OF_BAND_QUEUE_TYPE_RENDER_NV; ++ break; ++ case OUT_OF_BAND_PRESENT: ++ queue_type = VK_OUT_OF_BAND_QUEUE_TYPE_PRESENT_NV; ++ break; ++ default: ++ WARN("Invalid queue type %x\n", type); ++ } ++ ++ queue_info.sType = VK_STRUCTURE_TYPE_OUT_OF_BAND_QUEUE_TYPE_INFO_NV; ++ queue_info.pNext = NULL; ++ queue_info.queueType = queue_type; ++ ++ VK_CALL(vkQueueNotifyOutOfBandNV(command_queue->vkd3d_queue->vk_queue, &queue_info)); ++ ++ return S_OK; ++} ++ ++CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl = ++{ ++ /* IUnknown methods */ ++ d3d12_command_queue_vkd3d_ext_QueryInterface, ++ d3d12_command_queue_vkd3d_ext_AddRef, ++ d3d12_command_queue_vkd3d_ext_Release, ++ ++ /* ID3D12CommandQueueExt methods */ ++ d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue ++}; ++ +diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c +index fcf408aa2b..5d27bbaa18 100644 +--- a/libs/vkd3d/device.c ++++ b/libs/vkd3d/device.c +@@ -116,6 +116,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = + VK_EXTENSION(NV_SHADER_SUBGROUP_PARTITIONED, NV_shader_subgroup_partitioned), + VK_EXTENSION(NV_MEMORY_DECOMPRESSION, NV_memory_decompression), + VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS_COMPUTE, NV_device_generated_commands_compute), ++ VK_EXTENSION(NV_LOW_LATENCY_2, NV_low_latency2), + /* VALVE extensions */ + VK_EXTENSION(VALVE_MUTABLE_DESCRIPTOR_TYPE, VALVE_mutable_descriptor_type), + VK_EXTENSION(VALVE_DESCRIPTOR_SET_HOST_MAPPING, VALVE_descriptor_set_host_mapping), +@@ -3096,8 +3097,9 @@ void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vk + } + + /* ID3D12Device */ +-extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface); ++extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface); + extern ULONG STDMETHODCALLTYPE d3d12_dxvk_interop_device_AddRef(ID3D12DXVKInteropDevice *iface); ++extern ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(ID3DLowLatencyDevice *iface); + + HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + REFIID riid, void **object) +@@ -3144,6 +3146,14 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + return S_OK; + } + ++ if (IsEqualGUID(riid, &IID_ID3DLowLatencyDevice)) ++ { ++ struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ d3d12_low_latency_device_AddRef(&device->ID3DLowLatencyDevice_iface); ++ *object = &device->ID3DLowLatencyDevice_iface; ++ return S_OK; ++ } ++ + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; +@@ -8005,6 +8015,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device) + + extern CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl; + extern CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl; ++extern CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl; + + static void vkd3d_scratch_pool_init(struct d3d12_device *device) + { +@@ -8075,6 +8086,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + + device->ID3D12DeviceExt_iface.lpVtbl = &d3d12_device_vkd3d_ext_vtbl; + device->ID3D12DXVKInteropDevice_iface.lpVtbl = &d3d12_dxvk_interop_device_vtbl; ++ device->ID3DLowLatencyDevice_iface.lpVtbl = &d3d_low_latency_device_vtbl; + + if ((rc = rwlock_init(&device->vertex_input_lock))) + { +diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c +index 5bb7eca840..cf10247488 100644 +--- a/libs/vkd3d/device_vkd3d_ext.c ++++ b/libs/vkd3d/device_vkd3d_ext.c +@@ -20,18 +20,18 @@ + + #include "vkd3d_private.h" + +-static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(ID3D12DeviceExt *iface) ++static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(d3d12_device_vkd3d_ext_iface *iface) + { + return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12DeviceExt_iface); + } + +-ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface) ++ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface) + { + struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); + return d3d12_device_add_ref(device); + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(d3d12_device_vkd3d_ext_iface *iface) + { + struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); + return d3d12_device_release(device); +@@ -40,7 +40,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *i + extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + REFIID riid, void **object); + +-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12DeviceExt *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(d3d12_device_vkd3d_ext_iface *iface, + REFIID iid, void **out) + { + struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); +@@ -48,7 +48,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12Dev + return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12DeviceExt *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device) ++static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(d3d12_device_vkd3d_ext_iface *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device) + { + struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); + TRACE("iface %p, vk_instance %p, vk_physical_device %p, vk_device %p \n", iface, vk_instance, vk_physical_device, vk_device); +@@ -61,7 +61,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12D + return S_OK; + } + +-static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12DeviceExt *iface, D3D12_VK_EXTENSION extension) ++static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(d3d12_device_vkd3d_ext_iface *iface, D3D12_VK_EXTENSION extension) + { + const struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); + bool ret_val = false; +@@ -75,6 +75,9 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D + case D3D12_VK_NVX_IMAGE_VIEW_HANDLE: + ret_val = device->vk_info.NVX_image_view_handle; + break; ++ case D3D12_VK_NV_LOW_LATENCY_2: ++ ret_val = device->vk_info.NV_low_latency2; ++ break; + default: + WARN("Invalid extension %x\n", extension); + } +@@ -82,7 +85,7 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D + return ret_val; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(ID3D12DeviceExt *iface, const void *cubin_data, ++static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(d3d12_device_vkd3d_ext_iface *iface, const void *cubin_data, + UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **out_handle) + { + VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX }; +@@ -129,7 +132,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShader + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(ID3D12DeviceExt *iface, D3D12_CUBIN_DATA_HANDLE *handle) ++static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(d3d12_device_vkd3d_ext_iface *iface, D3D12_CUBIN_DATA_HANDLE *handle) + { + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_device *device; +@@ -149,7 +152,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShade + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle, ++static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle, + D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle) + { + VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; +@@ -177,7 +180,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3 + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, ++static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, + UINT32 *cuda_surface_handle) + { + VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; +@@ -202,7 +205,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3 + + extern VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info; + +-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(ID3D12DeviceExt *iface, D3D12_UAV_INFO *uav_info) ++static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(d3d12_device_vkd3d_ext_iface *iface, D3D12_UAV_INFO *uav_info) + { + if (!uav_info) + return E_INVALIDARG; +@@ -417,3 +420,136 @@ CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl = + d3d12_dxvk_interop_device_LockCommandQueue, + d3d12_dxvk_interop_device_UnlockCommandQueue, + }; ++ ++static inline struct d3d12_device *d3d12_device_from_ID3DLowLatencyDevice(d3d_low_latency_device_iface *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_device, ID3DLowLatencyDevice_iface); ++} ++ ++ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(d3d_low_latency_device_iface *iface) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ return d3d12_device_add_ref(device); ++} ++ ++static ULONG STDMETHODCALLTYPE d3d12_low_latency_device_Release(d3d_low_latency_device_iface *iface) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ return d3d12_device_release(device); ++} ++ ++extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, ++ REFIID riid, void **object); ++ ++static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_QueryInterface(d3d_low_latency_device_iface *iface, ++ REFIID iid, void **out) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); ++ return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out); ++} ++ ++static BOOL STDMETHODCALLTYPE d3d12_low_latency_device_SupportsLowLatency(d3d_low_latency_device_iface *iface) ++{ ++ struct d3d12_device *device; ++ ++ device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ ++ return device->vk_info.NV_low_latency2; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_LatencySleep(d3d_low_latency_device_iface *iface) ++{ ++ struct d3d12_device *device; ++ ++ device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ ++ if (!device->vk_info.NV_low_latency2) ++ return E_NOTIMPL; ++ ++ if (device->swapchain_info.low_latency_swapchain) ++ return dxgi_vk_swap_chain_latency_sleep(device->swapchain_info.low_latency_swapchain); ++ ++ return S_OK; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencySleepMode(d3d_low_latency_device_iface *iface, BOOL low_latency_mode, BOOL low_latency_boost, ++ UINT32 minimum_interval_us) ++{ ++ struct d3d12_device *device; ++ ++ device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ ++ if (!device->vk_info.NV_low_latency2) ++ return E_NOTIMPL; ++ ++ if (device->swapchain_info.low_latency_swapchain) ++ return dxgi_vk_swap_chain_set_latency_sleep_mode(device->swapchain_info.low_latency_swapchain, low_latency_mode, low_latency_boost, minimum_interval_us); ++ ++ return S_OK; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencyMarker(d3d_low_latency_device_iface *iface, UINT64 frameID, UINT32 markerType) ++{ ++ struct d3d12_device *device; ++ VkLatencyMarkerNV vk_marker; ++ uint64_t internal_frame_id; ++ ++ device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ vk_marker = (VkLatencyMarkerNV)markerType; ++ ++ if (!device->vk_info.NV_low_latency2) ++ return E_NOTIMPL; ++ ++ // Offset the frameID by one to ensure it will always ++ // be a valid presentID ++ internal_frame_id = frameID + 1; ++ ++ switch (vk_marker) ++ { ++ case VK_LATENCY_MARKER_SIMULATION_START_NV: ++ device->frame_markers.simulation = internal_frame_id; ++ break; ++ case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV: ++ device->frame_markers.render = internal_frame_id; ++ break; ++ case VK_LATENCY_MARKER_PRESENT_START_NV: ++ device->frame_markers.present = internal_frame_id; ++ break; ++ } ++ ++ if (device->swapchain_info.low_latency_swapchain) ++ return dxgi_vk_swap_chain_set_latency_marker(device->swapchain_info.low_latency_swapchain, internal_frame_id, vk_marker); ++ ++ return S_OK; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_GetLatencyInfo(d3d_low_latency_device_iface *iface, D3D12_LATENCY_RESULTS *latency_results) ++{ ++ struct d3d12_device *device; ++ ++ device = d3d12_device_from_ID3DLowLatencyDevice(iface); ++ ++ if (!device->vk_info.NV_low_latency2) ++ return E_NOTIMPL; ++ ++ if (device->swapchain_info.low_latency_swapchain) ++ return dxgi_vk_swap_chain_get_latency_info(device->swapchain_info.low_latency_swapchain, latency_results); ++ ++ return S_OK; ++} ++ ++CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl = ++{ ++ /* IUnknown methods */ ++ d3d12_low_latency_device_QueryInterface, ++ d3d12_low_latency_device_AddRef, ++ d3d12_low_latency_device_Release, ++ ++ /* ID3DLowLatencyDevice methods */ ++ d3d12_low_latency_device_SupportsLowLatency, ++ d3d12_low_latency_device_LatencySleep, ++ d3d12_low_latency_device_SetLatencySleepMode, ++ d3d12_low_latency_device_SetLatencyMarker, ++ d3d12_low_latency_device_GetLatencyInfo ++}; +diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build +index 3692ceba9c..2120e7d52a 100644 +--- a/libs/vkd3d/meson.build ++++ b/libs/vkd3d/meson.build +@@ -41,6 +41,7 @@ vkd3d_src = [ + 'cache.c', + 'command.c', + 'command_list_vkd3d_ext.c', ++ 'command_queue_vkd3d_ext.c', + 'device.c', + 'device_vkd3d_ext.c', + 'heap.c', +diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c +index 27a55c8b5b..80eb558d08 100644 +--- a/libs/vkd3d/swapchain.c ++++ b/libs/vkd3d/swapchain.c +@@ -58,6 +58,7 @@ struct dxgi_vk_swap_chain_present_request + DXGI_COLOR_SPACE_TYPE dxgi_color_space_type; + DXGI_VK_HDR_METADATA dxgi_hdr_metadata; + uint32_t swap_interval; ++ uint64_t frame_id; + bool modifies_hdr_metadata; + }; + +@@ -67,6 +68,13 @@ struct present_wait_entry + uint64_t begin_frame_time_ns; + }; + ++struct low_latency_state ++{ ++ bool mode; ++ bool boost; ++ uint32_t minimum_interval_us; ++}; ++ + struct dxgi_vk_swap_chain + { + IDXGIVkSwapChain IDXGIVkSwapChain_iface; +@@ -133,6 +141,16 @@ struct dxgi_vk_swap_chain + /* State tracking in present tasks on how to deal with swapchain recreation. */ + bool force_swapchain_recreation; + bool is_surface_lost; ++ ++ /* Info about the current low latency state of the swapchain */ ++ pthread_mutex_t low_latency_lock; ++ ++ VkSemaphore low_latency_sem; ++ uint64_t low_latency_sem_value; ++ ++ bool low_latency_update_requested; ++ struct low_latency_state requested_low_latency_state; ++ struct low_latency_state low_latency_state; + } present; + + struct dxgi_vk_swap_chain_present_request request, request_ring[DXGI_MAX_SWAP_CHAIN_BUFFERS]; +@@ -317,6 +335,13 @@ static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_Release(IDXGIVkSwapChain *ifac + + if (!refcount) + { ++ if (chain->queue->device->vk_info.NV_low_latency2) ++ { ++ pthread_mutex_lock(&chain->present.low_latency_lock); ++ d3d12_device_remove_swapchain(chain->queue->device, chain); ++ pthread_mutex_unlock(&chain->present.low_latency_lock); ++ } ++ + dxgi_vk_swap_chain_drain_queue(chain); + dxgi_vk_swap_chain_cleanup(chain); + vkd3d_free(chain); +@@ -760,6 +785,7 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if + request->dxgi_hdr_metadata = chain->user.dxgi_hdr_metadata; + request->modifies_hdr_metadata = chain->user.modifies_hdr_metadata; + request->begin_frame_time_ns = chain->user.begin_frame_time_ns; ++ request->frame_id = chain->queue->device->frame_markers.present; + chain->user.modifies_hdr_metadata = false; + + /* Need to process this task in queue thread to deal with wait-before-signal. +@@ -1283,6 +1309,8 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk + VkDevice vk_device = chain->queue->device->vk_device; + VkCommandPoolCreateInfo command_pool_create_info; + VkSwapchainCreateInfoKHR swapchain_create_info; ++ VkSwapchainLatencyCreateInfoNV swapchain_latency_create_info; ++ VkLatencySleepModeInfoNV swapchain_latency_sleep_mode_info; + VkSurfaceCapabilitiesKHR surface_caps; + VkSurfaceFormatKHR surface_format; + VkImageViewCreateInfo view_info; +@@ -1374,6 +1402,15 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk + swapchain_create_info.imageExtent.height = max(swapchain_create_info.imageExtent.height, surface_caps.minImageExtent.height); + swapchain_create_info.imageExtent.height = min(swapchain_create_info.imageExtent.height, surface_caps.maxImageExtent.height); + ++ if (chain->queue->device->vk_info.NV_low_latency2) ++ { ++ memset(&swapchain_latency_create_info, 0, sizeof(swapchain_latency_create_info)); ++ swapchain_latency_create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV; ++ swapchain_latency_create_info.pNext = NULL; ++ swapchain_latency_create_info.latencyModeEnable = true; ++ swapchain_create_info.pNext = &swapchain_latency_create_info; ++ } ++ + vr = VK_CALL(vkCreateSwapchainKHR(vk_device, &swapchain_create_info, NULL, &chain->present.vk_swapchain)); + if (vr < 0) + { +@@ -1387,6 +1424,29 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk + + INFO("Got %u swapchain images.\n", chain->present.backbuffer_count); + ++ /* If low latency is supported restore the current low latency state now */ ++ if (chain->queue->device->vk_info.NV_low_latency2) ++ { ++ struct low_latency_state* low_latency_state = chain->present.low_latency_update_requested ? ++ &chain->present.requested_low_latency_state : &chain->present.low_latency_state; ++ ++ memset(&swapchain_latency_sleep_mode_info, 0, sizeof(swapchain_latency_sleep_mode_info)); ++ swapchain_latency_sleep_mode_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV; ++ swapchain_latency_sleep_mode_info.pNext = NULL; ++ ++ swapchain_latency_sleep_mode_info.lowLatencyMode = low_latency_state->mode; ++ swapchain_latency_sleep_mode_info.lowLatencyBoost = low_latency_state->boost; ++ swapchain_latency_sleep_mode_info.minimumIntervalUs = low_latency_state->minimum_interval_us; ++ ++ VK_CALL(vkSetLatencySleepModeNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &swapchain_latency_sleep_mode_info)); ++ ++ if (chain->present.low_latency_update_requested) ++ { ++ memcpy(&chain->present.low_latency_state, &chain->present.requested_low_latency_state, sizeof(struct low_latency_state)); ++ chain->present.low_latency_update_requested = false; ++ } ++ } ++ + memset(&view_info, 0, sizeof(view_info)); + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.format = swapchain_create_info.imageFormat; +@@ -1795,9 +1855,12 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai + * Non-FIFO swapchains will pump their frame latency handles through the fallback path of blit command being done. + * Especially on Xwayland, the present ID is updated when images actually hit on-screen due to MAILBOX behavior. + * This would unnecessarily stall our progress. */ +- if (chain->wait_thread.active && !chain->present.present_id_valid && chain->request.swap_interval > 0) ++ if (chain->wait_thread.active && !chain->present.present_id_valid && ++ (chain->request.swap_interval > 0 || chain->present.low_latency_state.mode)) + { +- chain->present.present_id += 1; ++ chain->present.present_id = (chain->present.low_latency_state.mode) ? ++ chain->request.frame_id : chain->present.present_id + 1; ++ + present_id.sType = VK_STRUCTURE_TYPE_PRESENT_ID_KHR; + present_id.pNext = NULL; + present_id.swapchainCount = 1; +@@ -1905,6 +1968,9 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_) + if (!chain->wait_thread.active) + present_count = 1; + ++ if (chain->queue->device->vk_info.NV_low_latency2) ++ pthread_mutex_lock(&chain->present.low_latency_lock); ++ + for (i = 0; i < present_count; i++) + { + /* A present iteration may or may not render to backbuffer. We'll apply best effort here. +@@ -1912,6 +1978,9 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_) + dxgi_vk_swap_chain_present_iteration(chain, 0); + } + ++ if (chain->queue->device->vk_info.NV_low_latency2) ++ pthread_mutex_unlock(&chain->present.low_latency_lock); ++ + /* When this is signalled, lets main thread know that it's safe to free user buffers. + * Signal this just once on the outside since we might have retries, swap_interval > 1, etc, which complicates command buffer recycling. */ + dxgi_vk_swap_chain_present_signal_blit_semaphore(chain); +@@ -2041,6 +2110,52 @@ static HRESULT dxgi_vk_swap_chain_init_waiter_thread(struct dxgi_vk_swap_chain * + return S_OK; + } + ++static HRESULT dxgi_vk_swap_chain_init_low_latency(struct dxgi_vk_swap_chain* chain) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; ++ ++ VkSemaphoreTypeCreateInfoKHR semaphore_type_info; ++ VkSemaphoreCreateInfo semaphore_info; ++ VkResult vr; ++ ++ chain->present.low_latency_sem = VK_NULL_HANDLE; ++ chain->present.low_latency_sem_value = 0; ++ ++ chain->present.low_latency_update_requested = false; ++ chain->present.requested_low_latency_state.mode = false; ++ chain->present.requested_low_latency_state.boost = false; ++ chain->present.requested_low_latency_state.minimum_interval_us = 0; ++ ++ chain->present.low_latency_state.mode = false; ++ chain->present.low_latency_state.boost = false; ++ chain->present.low_latency_state.minimum_interval_us = 0; ++ ++ if (chain->queue->device->vk_info.NV_low_latency2) ++ { ++ memset(&semaphore_type_info, 0, sizeof(semaphore_type_info)); ++ semaphore_type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR; ++ semaphore_type_info.pNext = NULL; ++ semaphore_type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR; ++ semaphore_type_info.initialValue = 0; ++ ++ memset(&semaphore_info, 0, sizeof(semaphore_info)); ++ semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; ++ semaphore_info.pNext = &semaphore_type_info; ++ semaphore_info.flags = 0; ++ ++ if ((vr = VK_CALL(vkCreateSemaphore(chain->queue->device->vk_device, ++ &semaphore_info, NULL, &chain->present.low_latency_sem))) < 0) ++ { ++ ERR("Failed to create semaphore, vr %d.\n", vr); ++ return hresult_from_vk_result(vr); ++ } ++ ++ pthread_mutex_init(&chain->present.low_latency_lock, NULL); ++ } ++ ++ return S_OK; ++} ++ + static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVkSurfaceFactory *pFactory, + const DXGI_SWAP_CHAIN_DESC1 *pDesc, struct d3d12_command_queue *queue) + { +@@ -2066,6 +2181,9 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk + if (FAILED(hr = dxgi_vk_swap_chain_init_waiter_thread(chain))) + goto err; + ++ if (FAILED(hr = dxgi_vk_swap_chain_init_low_latency(chain))) ++ goto err; ++ + ID3D12CommandQueue_AddRef(&queue->ID3D12CommandQueue_iface); + return S_OK; + +@@ -2093,6 +2211,13 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_factory_CreateSwapChain(IDXG + return hr; + } + ++ if (chain->queue->device->vk_info.NV_low_latency2) ++ { ++ pthread_mutex_lock(&chain->present.low_latency_lock); ++ d3d12_device_register_swapchain(chain->queue->device, chain); ++ pthread_mutex_unlock(&chain->present.low_latency_lock); ++ } ++ + *ppSwapchain = &chain->IDXGIVkSwapChain_iface; + return S_OK; + } +@@ -2108,6 +2233,192 @@ static CONST_VTBL struct IDXGIVkSwapChainFactoryVtbl dxgi_vk_swap_chain_factory_ + dxgi_vk_swap_chain_factory_CreateSwapChain, + }; + ++bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain* chain) ++{ ++ return chain->present.low_latency_state.mode; ++} ++ ++HRESULT dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain* chain) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; ++ ++ VkLatencySleepInfoNV latency_sleep_info; ++ VkSemaphoreWaitInfo sem_wait_info; ++ ++ if (chain->present.low_latency_state.mode) ++ { ++ // Increment the low latency sem value before the wait ++ chain->present.low_latency_sem_value++; ++ ++ memset(&latency_sleep_info, 0, sizeof(latency_sleep_info)); ++ latency_sleep_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV; ++ latency_sleep_info.pNext = NULL; ++ latency_sleep_info.signalSemaphore = chain->present.low_latency_sem; ++ latency_sleep_info.value = chain->present.low_latency_sem_value; ++ ++ pthread_mutex_lock(&chain->present.low_latency_lock); ++ VK_CALL(vkLatencySleepNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_sleep_info)); ++ pthread_mutex_unlock(&chain->present.low_latency_lock); ++ ++ memset(&sem_wait_info, 0, sizeof(sem_wait_info)); ++ sem_wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; ++ sem_wait_info.pNext = NULL; ++ sem_wait_info.flags = 0; ++ sem_wait_info.semaphoreCount = 1; ++ sem_wait_info.pSemaphores = &chain->present.low_latency_sem; ++ sem_wait_info.pValues = &chain->present.low_latency_sem_value; ++ ++ VK_CALL(vkWaitSemaphores(chain->queue->device->vk_device, &sem_wait_info, UINT64_MAX)); ++ } ++ ++ return S_OK; ++} ++ ++HRESULT dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain* chain, bool low_latency_mode, ++ bool low_latency_boost, uint32_t minimum_interval_us) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; ++ ++ VkLatencySleepModeInfoNV latency_sleep_mode_info; ++ ++ if (low_latency_mode == chain->present.low_latency_state.mode && ++ low_latency_boost == chain->present.low_latency_state.boost && ++ minimum_interval_us == chain->present.low_latency_state.minimum_interval_us) ++ { ++ return S_OK; ++ } ++ ++ // If we are enabling low latency mode, recreate the swapchain ++ // to sync the frameIds provided by nvapi to the presentID ++ // used a present time ++ if (low_latency_mode && !chain->present.low_latency_state.mode) ++ { ++ chain->present.requested_low_latency_state.mode = low_latency_mode; ++ chain->present.requested_low_latency_state.boost = low_latency_boost; ++ chain->present.requested_low_latency_state.minimum_interval_us = minimum_interval_us; ++ ++ // In order to use the frameId provided by the application ++ // the swapchain will have to be recreated to reset the ++ // present ID ++ chain->present.low_latency_update_requested = true; ++ chain->present.force_swapchain_recreation = true; ++ } ++ else ++ { ++ memset(&latency_sleep_mode_info, 0, sizeof(latency_sleep_mode_info)); ++ latency_sleep_mode_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV; ++ latency_sleep_mode_info.pNext = NULL; ++ latency_sleep_mode_info.lowLatencyMode = low_latency_mode; ++ latency_sleep_mode_info.lowLatencyBoost = low_latency_boost; ++ latency_sleep_mode_info.minimumIntervalUs = minimum_interval_us; ++ ++ pthread_mutex_lock(&chain->present.low_latency_lock); ++ VK_CALL(vkSetLatencySleepModeNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_sleep_mode_info)); ++ pthread_mutex_unlock(&chain->present.low_latency_lock); ++ ++ chain->present.low_latency_state.mode = low_latency_mode; ++ chain->present.low_latency_state.boost = low_latency_boost; ++ chain->present.low_latency_state.minimum_interval_us = minimum_interval_us; ++ } ++ ++ return S_OK; ++} ++ ++HRESULT dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain* chain, uint64_t frameID, VkLatencyMarkerNV marker) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; ++ ++ VkSetLatencyMarkerInfoNV latency_marker_info; ++ ++ if (chain->present.low_latency_state.mode) ++ { ++ memset(&latency_marker_info, 0, sizeof(latency_marker_info)); ++ latency_marker_info.sType = VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV; ++ latency_marker_info.pNext = NULL; ++ latency_marker_info.presentID = frameID; ++ latency_marker_info.marker = marker; ++ ++ pthread_mutex_lock(&chain->present.low_latency_lock); ++ VK_CALL(vkSetLatencyMarkerNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_marker_info)); ++ pthread_mutex_unlock(&chain->present.low_latency_lock); ++ } ++ ++ return S_OK; ++} ++ ++HRESULT dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain, D3D12_LATENCY_RESULTS *latency_results) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; ++ ++ VkGetLatencyMarkerInfoNV marker_info; ++ VkLatencyTimingsFrameReportNV* frame_reports; ++ uint32_t report_count; ++ uint32_t i; ++ ++ if (!chain->present.low_latency_state.mode) ++ { ++ memset(latency_results->frame_reports, 0, sizeof(latency_results->frame_reports)); ++ return S_OK; ++ } ++ ++ memset(&marker_info, 0, sizeof(marker_info)); ++ marker_info.sType = VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV; ++ ++ pthread_mutex_lock(&chain->present.low_latency_lock); ++ ++ VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &report_count, &marker_info)); ++ ++ if (report_count >= 64) ++ { ++ report_count = 64; ++ frame_reports = vkd3d_calloc(report_count, sizeof(VkLatencyTimingsFrameReportNV)); ++ for (i = 0; i < report_count; i++) ++ frame_reports[i].sType = VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV; ++ ++ marker_info.pTimings = frame_reports; ++ ++ VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &report_count, &marker_info)); ++ ++ for (i = 0; i < report_count; i++) ++ { ++ latency_results->frame_reports[i].frameID = frame_reports[i].presentID - 1; ++ latency_results->frame_reports[i].inputSampleTime = frame_reports[i].inputSampleTimeUs; ++ latency_results->frame_reports[i].simStartTime = frame_reports[i].simStartTimeUs; ++ latency_results->frame_reports[i].simEndTime = frame_reports[i].simEndTimeUs; ++ latency_results->frame_reports[i].renderSubmitStartTime = frame_reports[i].renderSubmitStartTimeUs; ++ latency_results->frame_reports[i].renderSubmitEndTime = frame_reports[i].renderSubmitEndTimeUs; ++ latency_results->frame_reports[i].presentStartTime = frame_reports[i].presentStartTimeUs; ++ latency_results->frame_reports[i].presentEndTime = frame_reports[i].presentEndTimeUs; ++ latency_results->frame_reports[i].driverStartTime = frame_reports[i].driverStartTimeUs; ++ latency_results->frame_reports[i].driverEndTime = frame_reports[i].driverEndTimeUs; ++ latency_results->frame_reports[i].osRenderQueueStartTime = frame_reports[i].osRenderQueueStartTimeUs; ++ latency_results->frame_reports[i].osRenderQueueEndTime = frame_reports[i].osRenderQueueEndTimeUs; ++ latency_results->frame_reports[i].gpuRenderStartTime = frame_reports[i].gpuRenderStartTimeUs; ++ latency_results->frame_reports[i].gpuRenderEndTime = frame_reports[i].gpuRenderEndTimeUs; ++ latency_results->frame_reports[i].gpuActiveRenderTimeUs = ++ frame_reports[i].gpuRenderEndTimeUs - frame_reports[i].gpuRenderStartTimeUs; ++ latency_results->frame_reports[i].gpuFrameTimeUs = 0; ++ ++ if (i) { ++ latency_results->frame_reports[i].gpuFrameTimeUs = ++ frame_reports[i].gpuRenderEndTimeUs - frame_reports[i - 1].gpuRenderEndTimeUs; ++ } ++ } ++ ++ vkd3d_free(frame_reports); ++ } ++ else ++ { ++ // If there are less than 64 frame reports, zero out the frame report ++ // buffer returned to the app. ++ memset(latency_results->frame_reports, 0, sizeof(latency_results->frame_reports)); ++ } ++ ++ pthread_mutex_unlock(&chain->present.low_latency_lock); ++ ++ return S_OK; ++} ++ + HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain) + { + chain->IDXGIVkSwapChainFactory_iface.lpVtbl = &dxgi_vk_swap_chain_factory_vtbl; +diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h +index 8c6a0ffd02..0c4929a258 100644 +--- a/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/vkd3d_private.h +@@ -37,6 +37,7 @@ + #include "vkd3d_platform.h" + #include "vkd3d_swapchain_factory.h" + #include "vkd3d_command_list_vkd3d_ext.h" ++#include "vkd3d_command_queue_vkd3d_ext.h" + #include "vkd3d_device_vkd3d_ext.h" + #include "vkd3d_string.h" + #include "vkd3d_file_utils.h" +@@ -166,6 +167,7 @@ struct vkd3d_vulkan_info + bool NV_shader_subgroup_partitioned; + bool NV_memory_decompression; + bool NV_device_generated_commands_compute; ++ bool NV_low_latency2; + /* VALVE extensions */ + bool VALVE_mutable_descriptor_type; + bool VALVE_descriptor_set_host_mapping; +@@ -2991,6 +2993,7 @@ struct d3d12_command_queue_submission_execute + LONG **outstanding_submissions_counters; + UINT cmd_count; + UINT outstanding_submissions_counter_count; ++ uint64_t frame_id; + + struct vkd3d_initial_transition *transitions; + size_t transition_count; +@@ -3046,12 +3049,30 @@ struct dxgi_vk_swap_chain_factory + struct d3d12_command_queue *queue; + }; + ++struct dxgi_vk_swap_chain; ++ ++bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain* chain); ++HRESULT dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain* chain); ++HRESULT dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain* chain, ++ bool low_latency_mode, bool low_latency_boost, uint32_t minimum_interval_us); ++HRESULT dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain* chain, ++ uint64_t frameID, VkLatencyMarkerNV marker); ++HRESULT dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain, ++ D3D12_LATENCY_RESULTS *latency_results); ++ + HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain); + ++/* ID3D12CommandQueueExt */ ++typedef ID3D12CommandQueueExt d3d12_command_queue_vkd3d_ext_iface; ++ + /* ID3D12CommandQueue */ ++typedef ID3D12CommandQueue d3d12_command_queue_iface; ++ + struct d3d12_command_queue + { +- ID3D12CommandQueue ID3D12CommandQueue_iface; ++ d3d12_command_queue_iface ID3D12CommandQueue_iface; ++ d3d12_command_queue_vkd3d_ext_iface ID3D12CommandQueueExt_iface; ++ + LONG refcount; + + D3D12_COMMAND_QUEUE_DESC desc; +@@ -4203,6 +4224,19 @@ struct vkd3d_cached_command_allocator + uint32_t vk_family_index; + }; + ++struct vkd3d_device_swapchain_info ++{ ++ struct dxgi_vk_swap_chain* low_latency_swapchain; ++ uint32_t swapchain_count; ++}; ++ ++struct vkd3d_device_frame_markers ++{ ++ uint64_t simulation; ++ uint64_t render; ++ uint64_t present; ++}; ++ + /* ID3D12Device */ + typedef ID3D12Device12 d3d12_device_iface; + +@@ -4215,6 +4249,9 @@ typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface; + /* ID3D12DXVKInteropDevice */ + typedef ID3D12DXVKInteropDevice d3d12_dxvk_interop_device_iface; + ++/* ID3DLowLatencyDevice */ ++typedef ID3DLowLatencyDevice d3d_low_latency_device_iface; ++ + struct d3d12_device_scratch_pool + { + struct vkd3d_scratch_buffer scratch_buffers[VKD3D_MAX_SCRATCH_BUFFER_COUNT]; +@@ -4229,6 +4266,7 @@ struct d3d12_device + d3d12_device_iface ID3D12Device_iface; + d3d12_device_vkd3d_ext_iface ID3D12DeviceExt_iface; + d3d12_dxvk_interop_device_iface ID3D12DXVKInteropDevice_iface; ++ d3d_low_latency_device_iface ID3DLowLatencyDevice_iface; + LONG refcount; + + VkDevice vk_device; +@@ -4299,6 +4337,9 @@ struct d3d12_device + #endif + uint64_t shader_interface_key; + uint32_t device_has_dgc_templates; ++ ++ struct vkd3d_device_swapchain_info swapchain_info; ++ struct vkd3d_device_frame_markers frame_markers; + }; + + HRESULT d3d12_device_create(struct vkd3d_instance *instance, +@@ -4521,6 +4562,31 @@ UINT d3d12_determine_shading_rate_image_tile_size(struct d3d12_device *device); + bool d3d12_device_supports_required_subgroup_size_for_stage( + struct d3d12_device *device, VkShaderStageFlagBits stage); + ++static inline void d3d12_device_register_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain) ++{ ++ if (!device->swapchain_info.low_latency_swapchain && ++ device->swapchain_info.swapchain_count == 0) ++ { ++ device->swapchain_info.low_latency_swapchain = swapchain; ++ } ++ else ++ { ++ device->swapchain_info.low_latency_swapchain = NULL; ++ } ++ ++ device->swapchain_info.swapchain_count++; ++} ++ ++static inline void d3d12_device_remove_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain) ++{ ++ if (device->swapchain_info.low_latency_swapchain == swapchain) ++ { ++ device->swapchain_info.low_latency_swapchain = NULL; ++ } ++ ++ device->swapchain_info.swapchain_count--; ++} ++ + /* ID3DBlob */ + struct d3d_blob + { +diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h +index 983c06f884..f4bc39d9a6 100644 +--- a/libs/vkd3d/vulkan_procs.h ++++ b/libs/vkd3d/vulkan_procs.h +@@ -346,6 +346,13 @@ VK_DEVICE_EXT_PFN(vkSetDeviceMemoryPriorityEXT) + VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryNV) + VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryIndirectCountNV) + ++/* VK_NV_low_latency2 */ ++VK_DEVICE_EXT_PFN(vkSetLatencySleepModeNV) ++VK_DEVICE_EXT_PFN(vkLatencySleepNV) ++VK_DEVICE_EXT_PFN(vkSetLatencyMarkerNV) ++VK_DEVICE_EXT_PFN(vkGetLatencyTimingsNV) ++VK_DEVICE_EXT_PFN(vkQueueNotifyOutOfBandNV) ++ + #undef VK_INSTANCE_PFN + #undef VK_INSTANCE_EXT_PFN + #undef VK_DEVICE_PFN diff --git a/patches/proton/82-nv_low_latency_dxvk_nvapi.patch b/patches/proton/82-nv_low_latency_dxvk_nvapi.patch new file mode 100644 index 0000000000..ce5b3ccfee --- /dev/null +++ b/patches/proton/82-nv_low_latency_dxvk_nvapi.patch @@ -0,0 +1,1202 @@ +From 0738bfdd9b9fbdcba019bfdbcd2e29ffc8fe557a Mon Sep 17 00:00:00 2001 +From: Eric Sullivan +Date: Fri, 13 Oct 2023 01:38:30 +0000 +Subject: [PATCH] nvapi: Add support for Reflex + +The intent of this commit is to enable Reflex for all D3D11, +and D3D12 titles using dxvk-nvapi. It does this through a new +device interface called ID3DLowLatencyDevice. This interface +will be implemented by ID3D12Device in vkd3d-proton, and +ID3D11Device in dxvk. + +To provide compatibility with LatencyFleX this change will +only use the ID3DLowLatencyDevice interface when LatencyFleX +is not detected. +--- + src/d3d/nvapi_d3d_instance.cpp | 40 ++- + src/d3d/nvapi_d3d_instance.h | 14 +- + src/d3d/nvapi_d3d_low_latency_device.cpp | 58 +++++ + src/d3d/nvapi_d3d_low_latency_device.h | 23 ++ + src/d3d12/nvapi_d3d12_device.cpp | 26 ++ + src/d3d12/nvapi_d3d12_device.h | 4 + + src/meson.build | 2 + + src/nvapi_d3d.cpp | 76 ++++-- + src/nvapi_d3d12.cpp | 52 ++++ + src/nvapi_interface.cpp | 2 + + src/shared/shared_interfaces.cpp | 3 + + src/shared/shared_interfaces.h | 61 +++++ + src/vkd3d-proton/vkd3d-proton_interfaces.cpp | 1 + + src/vkd3d-proton/vkd3d-proton_interfaces.h | 17 +- + tests/meson.build | 2 + + tests/nvapi_d3d.cpp | 252 ++++++++++++++++++- + tests/nvapi_d3d12.cpp | 126 ++++++++++ + tests/nvapi_d3d12_mocks.h | 25 ++ + tests/nvapi_d3d_mocks.h | 12 + + tests/nvapi_tests_private.h | 1 + + 20 files changed, 750 insertions(+), 47 deletions(-) + create mode 100644 src/d3d/nvapi_d3d_low_latency_device.cpp + create mode 100644 src/d3d/nvapi_d3d_low_latency_device.h + create mode 100644 src/shared/shared_interfaces.cpp + create mode 100644 src/shared/shared_interfaces.h + +diff --git a/src/d3d/nvapi_d3d_instance.cpp b/src/d3d/nvapi_d3d_instance.cpp +index 6cdf2609..e1687ad7 100644 +--- a/src/d3d/nvapi_d3d_instance.cpp ++++ b/src/d3d/nvapi_d3d_instance.cpp +@@ -1,4 +1,5 @@ + #include "../util/util_log.h" ++#include "nvapi_d3d_low_latency_device.h" + #include "nvapi_d3d_instance.h" + + namespace dxvk { +@@ -13,25 +14,40 @@ namespace dxvk { + log::write("LatencyFleX loaded and initialized successfully"); + } + +- bool NvapiD3dInstance::IsReflexAvailable() { +- return m_lfx->IsAvailable(); ++ bool NvapiD3dInstance::IsReflexAvailable(IUnknown* device) { ++ return NvapiD3dLowLatencyDevice::SupportsLowLatency(device) || m_lfx->IsAvailable(); + } + +- bool NvapiD3dInstance::IsReflexEnabled() const { +- return m_isLfxEnabled; ++ bool NvapiD3dInstance::IsLowLatencyEnabled() const { ++ return m_isLowLatencyEnabled; + } + +- void NvapiD3dInstance::SetReflexEnabled(bool value) { +- m_isLfxEnabled = value; ++ bool NvapiD3dInstance::IsUsingLfx() const { ++ return m_lfx->IsAvailable(); + } + +- void NvapiD3dInstance::Sleep() { +- if (m_isLfxEnabled) +- m_lfx->WaitAndBeginFrame(); ++ bool NvapiD3dInstance::SetReflexMode(IUnknown* device, bool enable, bool boost, uint32_t frameTimeUs) { ++ bool result = true; ++ ++ if (IsReflexAvailable(device)) ++ m_isLowLatencyEnabled = enable; ++ ++ if (m_lfx->IsAvailable() && enable) ++ m_lfx->SetTargetFrameTime(frameTimeUs * kNanoInMicro); ++ else if (NvapiD3dLowLatencyDevice::SupportsLowLatency(device)) ++ result = NvapiD3dLowLatencyDevice::SetLatencySleepMode(device, enable, boost, frameTimeUs); ++ ++ return result; + } + +- void NvapiD3dInstance::SetTargetFrameTime(uint64_t frameTimeUs) { +- constexpr uint64_t kNanoInMicro = 1000; +- m_lfx->SetTargetFrameTime(frameTimeUs * kNanoInMicro); ++ bool NvapiD3dInstance::Sleep(IUnknown* device) { ++ bool result = true; ++ ++ if (m_lfx->IsAvailable() && m_isLowLatencyEnabled) ++ m_lfx->WaitAndBeginFrame(); ++ else if (NvapiD3dLowLatencyDevice::SupportsLowLatency(device)) ++ result = NvapiD3dLowLatencyDevice::LatencySleep(device); ++ ++ return result; + } + } +\ No newline at end of file +diff --git a/src/d3d/nvapi_d3d_instance.h b/src/d3d/nvapi_d3d_instance.h +index 5a223714..eb7d9388 100644 +--- a/src/d3d/nvapi_d3d_instance.h ++++ b/src/d3d/nvapi_d3d_instance.h +@@ -10,15 +10,17 @@ namespace dxvk { + ~NvapiD3dInstance(); + + void Initialize(); +- [[nodiscard]] bool IsReflexAvailable(); +- [[nodiscard]] bool IsReflexEnabled() const; +- void SetReflexEnabled(bool value); +- void Sleep(); +- void SetTargetFrameTime(uint64_t frameTimeUs); ++ [[nodiscard]] bool IsReflexAvailable(IUnknown* device); ++ [[nodiscard]] bool IsLowLatencyEnabled() const; ++ [[nodiscard]] bool IsUsingLfx() const; ++ [[nodiscard]] bool SetReflexMode(IUnknown* device, bool enable, bool boost, uint32_t frameTimeUs); ++ [[nodiscard]] bool Sleep(IUnknown* device); + + private: ++ constexpr static uint64_t kNanoInMicro = 1000; ++ + ResourceFactory& m_resourceFactory; + std::unique_ptr m_lfx; +- bool m_isLfxEnabled = false; ++ bool m_isLowLatencyEnabled = false; + }; + } +\ No newline at end of file +diff --git a/src/d3d/nvapi_d3d_low_latency_device.cpp b/src/d3d/nvapi_d3d_low_latency_device.cpp +new file mode 100644 +index 00000000..37c8dd75 +--- /dev/null ++++ b/src/d3d/nvapi_d3d_low_latency_device.cpp +@@ -0,0 +1,58 @@ ++#include "nvapi_d3d_low_latency_device.h" ++ ++namespace dxvk { ++ bool NvapiD3dLowLatencyDevice::SupportsLowLatency(IUnknown* device) { ++ auto d3dLowLatencyDevice = GetLowLatencyDevice(device); ++ if (d3dLowLatencyDevice == nullptr) ++ return false; ++ ++ return d3dLowLatencyDevice->SupportsLowLatency(); ++ } ++ ++ bool NvapiD3dLowLatencyDevice::LatencySleep(IUnknown* device) { ++ auto d3dLowLatencyDevice = GetLowLatencyDevice(device); ++ if (d3dLowLatencyDevice == nullptr) ++ return false; ++ ++ return SUCCEEDED(d3dLowLatencyDevice->LatencySleep()); ++ } ++ ++ bool NvapiD3dLowLatencyDevice::SetLatencySleepMode(IUnknown* device, bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs) { ++ auto d3dLowLatencyDevice = GetLowLatencyDevice(device); ++ if (d3dLowLatencyDevice == nullptr) ++ return false; ++ ++ return SUCCEEDED(d3dLowLatencyDevice->SetLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs)); ++ } ++ ++ bool NvapiD3dLowLatencyDevice::GetLatencyInfo(IUnknown* device, D3D_LATENCY_RESULTS* latencyResults) { ++ auto d3dLowLatencyDevice = GetLowLatencyDevice(device); ++ if (d3dLowLatencyDevice == nullptr) ++ return false; ++ ++ return SUCCEEDED(d3dLowLatencyDevice->GetLatencyInfo(latencyResults)); ++ } ++ ++ bool NvapiD3dLowLatencyDevice::SetLatencyMarker(IUnknown* device, uint64_t frameID, uint32_t markerType) { ++ auto d3dLowLatencyDevice = GetLowLatencyDevice(device); ++ if (d3dLowLatencyDevice == nullptr) ++ return false; ++ ++ return SUCCEEDED(d3dLowLatencyDevice->SetLatencyMarker(frameID, markerType)); ++ } ++ ++ Com NvapiD3dLowLatencyDevice::GetLowLatencyDevice(IUnknown* device) { ++ std::scoped_lock lock(m_LowLatencyDeviceMutex); ++ auto it = m_lowLatencyDeviceMap.find(device); ++ if (it != m_lowLatencyDeviceMap.end()) ++ return it->second; ++ ++ Com d3dLowLatencyDevice; ++ if (FAILED(device->QueryInterface(IID_PPV_ARGS(&d3dLowLatencyDevice)))) ++ return nullptr; ++ ++ m_lowLatencyDeviceMap.emplace(device, d3dLowLatencyDevice.ptr()); ++ ++ return d3dLowLatencyDevice; ++ } ++} +diff --git a/src/d3d/nvapi_d3d_low_latency_device.h b/src/d3d/nvapi_d3d_low_latency_device.h +new file mode 100644 +index 00000000..77938340 +--- /dev/null ++++ b/src/d3d/nvapi_d3d_low_latency_device.h +@@ -0,0 +1,23 @@ ++#pragma once ++ ++#include "../nvapi_private.h" ++#include "../shared/shared_interfaces.h" ++#include "../util/com_pointer.h" ++ ++namespace dxvk { ++ class NvapiD3dLowLatencyDevice { ++ public: ++ static bool SupportsLowLatency(IUnknown* device); ++ static bool LatencySleep(IUnknown* device); ++ static bool SetLatencySleepMode(IUnknown* device, bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs); ++ static bool GetLatencyInfo(IUnknown* device, D3D_LATENCY_RESULTS* latencyResults); ++ static bool SetLatencyMarker(IUnknown* device, uint64_t frameID, uint32_t markerType); ++ ++ private: ++ inline static std::unordered_map m_lowLatencyDeviceMap; ++ ++ inline static std::mutex m_LowLatencyDeviceMutex; ++ ++ [[nodiscard]] static Com GetLowLatencyDevice(IUnknown* device); ++ }; ++} +\ No newline at end of file +diff --git a/src/d3d12/nvapi_d3d12_device.cpp b/src/d3d12/nvapi_d3d12_device.cpp +index 0d30d347..b69fa982 100644 +--- a/src/d3d12/nvapi_d3d12_device.cpp ++++ b/src/d3d12/nvapi_d3d12_device.cpp +@@ -81,6 +81,14 @@ namespace dxvk { + return SUCCEEDED(cubinDevice->GetCudaSurfaceObject(uavHandle, reinterpret_cast(cudaSurfaceHandle))); + } + ++ bool NvapiD3d12Device::NotifyOutOfBandCommandQueue(ID3D12CommandQueue* commandQueue, D3D12_OUT_OF_BAND_CQ_TYPE type) { ++ auto commandQueueExt = GetCommandQueueExt(commandQueue); ++ if (commandQueueExt == nullptr) ++ return false; ++ ++ return SUCCEEDED(commandQueueExt->NotifyOutOfBandCommandQueue(type)); ++ } ++ + bool NvapiD3d12Device::LaunchCubinShader(ID3D12GraphicsCommandList* commandList, NVDX_ObjectHandle pShader, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const void* params, NvU32 paramSize) { + auto commandListExt = GetCommandListExt(commandList); + if (!commandListExt.has_value()) +@@ -146,6 +154,22 @@ namespace dxvk { + return deviceExt; + } + ++ Com NvapiD3d12Device::GetCommandQueueExt(ID3D12CommandQueue* commandQueue) { ++ std::scoped_lock lock(m_commandQueueMutex); ++ auto it = m_commandQueueMap.find(commandQueue); ++ if (it != m_commandQueueMap.end()) ++ return it->second; ++ ++ Com commandQueueExt; ++ if (FAILED(commandQueue->QueryInterface(IID_PPV_ARGS(&commandQueueExt)))) ++ return nullptr; ++ ++ if (commandQueueExt != nullptr) ++ m_commandQueueMap.emplace(commandQueue, commandQueueExt.ptr()); ++ ++ return commandQueueExt; ++ } ++ + std::optional NvapiD3d12Device::GetCommandListExt(ID3D12GraphicsCommandList* commandList) { + std::scoped_lock lock(m_commandListMutex); + auto it = m_commandListMap.find(commandList); +@@ -169,11 +193,13 @@ namespace dxvk { + } + + void NvapiD3d12Device::ClearCacheMaps() { ++ std::scoped_lock commandQueueLock(m_commandQueueMutex); + std::scoped_lock commandListLock(m_commandListMutex); + std::scoped_lock cubinDeviceLock(m_cubinDeviceMutex); + std::scoped_lock cubinSmemLock(m_cubinSmemMutex); + + m_cubinDeviceMap.clear(); ++ m_commandQueueMap.clear(); + m_commandListMap.clear(); + m_cubinSmemMap.clear(); + } +diff --git a/src/d3d12/nvapi_d3d12_device.h b/src/d3d12/nvapi_d3d12_device.h +index ef419d34..386de906 100644 +--- a/src/d3d12/nvapi_d3d12_device.h ++++ b/src/d3d12/nvapi_d3d12_device.h +@@ -24,6 +24,7 @@ namespace dxvk { + static bool DestroyCubinComputeShader(ID3D12Device* device, NVDX_ObjectHandle shader); + static bool GetCudaTextureObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE srvHandle, D3D12_CPU_DESCRIPTOR_HANDLE samplerHandle, NvU32* cudaTextureHandle); + static bool GetCudaSurfaceObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE uavHandle, NvU32* cudaSurfaceHandle); ++ static bool NotifyOutOfBandCommandQueue(ID3D12CommandQueue* commandQueue, D3D12_OUT_OF_BAND_CQ_TYPE type); + static bool LaunchCubinShader(ID3D12GraphicsCommandList* commandList, NVDX_ObjectHandle shader, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const void* params, NvU32 paramSize); + static bool CaptureUAVInfo(ID3D12Device* device, NVAPI_UAV_INFO* uavInfo); + static bool IsFatbinPTXSupported(ID3D12Device* device); +@@ -32,15 +33,18 @@ namespace dxvk { + + private: + inline static std::unordered_map m_cubinDeviceMap; ++ inline static std::unordered_map m_commandQueueMap; + inline static std::unordered_map m_commandListMap; + inline static std::unordered_map m_cubinSmemMap; + + inline static std::mutex m_commandListMutex; ++ inline static std::mutex m_commandQueueMutex; + inline static std::mutex m_cubinDeviceMutex; + inline static std::mutex m_cubinSmemMutex; + + [[nodiscard]] static Com GetCubinDevice(ID3D12Device* device); + [[nodiscard]] static Com GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension); ++ [[nodiscard]] static Com GetCommandQueueExt(ID3D12CommandQueue* commandQueue); + [[nodiscard]] static std::optional GetCommandListExt(ID3D12GraphicsCommandList* commandList); + }; + } +diff --git a/src/meson.build b/src/meson.build +index 98410c0f..daabb3f8 100644 +--- a/src/meson.build ++++ b/src/meson.build +@@ -1,6 +1,7 @@ + nvapi_src = files([ + 'dxvk/dxvk_interfaces.cpp', + 'vkd3d-proton/vkd3d-proton_interfaces.cpp', ++ 'shared/shared_interfaces.cpp', + 'util/util_string.cpp', + 'util/util_env.cpp', + 'util/util_log.cpp', +@@ -12,6 +13,7 @@ nvapi_src = files([ + 'resource_factory.cpp', + 'd3d/lfx.cpp', + 'd3d/nvapi_d3d_instance.cpp', ++ 'd3d/nvapi_d3d_low_latency_device.cpp', + 'd3d11/nvapi_d3d11_device.cpp', + 'd3d12/nvapi_d3d12_device.cpp', + 'nvapi_globals.cpp', +diff --git a/src/nvapi_d3d.cpp b/src/nvapi_d3d.cpp +index 7f1168bf..166abc06 100644 +--- a/src/nvapi_d3d.cpp ++++ b/src/nvapi_d3d.cpp +@@ -1,3 +1,5 @@ ++#include "dxvk/dxvk_interfaces.h" ++#include "d3d/nvapi_d3d_low_latency_device.h" + #include "nvapi_private.h" + #include "nvapi_globals.h" + #include "util/util_statuscode.h" +@@ -106,23 +108,26 @@ extern "C" { + + NvAPI_Status __cdecl NvAPI_D3D_Sleep(IUnknown* pDevice) { + constexpr auto n = __func__; ++ static bool alreadyLoggedNoReflex = false; ++ static bool alreadyLoggedError = false; + static bool alreadyLoggedOk = false; +- static bool alreadyLoggedNoLfx = false; + + if (nvapiAdapterRegistry == nullptr) + return ApiNotInitialized(n); + +- if (!nvapiD3dInstance->IsReflexAvailable()) +- return NoImplementation(n, alreadyLoggedNoLfx); ++ if (!nvapiD3dInstance->IsReflexAvailable(pDevice)) ++ return NoImplementation(n, alreadyLoggedNoReflex); + +- nvapiD3dInstance->Sleep(); ++ if (!nvapiD3dInstance->Sleep(pDevice)) ++ return Error(n, alreadyLoggedError); + + return Ok(n, alreadyLoggedOk); + } + + NvAPI_Status __cdecl NvAPI_D3D_SetSleepMode(IUnknown* pDevice, NV_SET_SLEEP_MODE_PARAMS* pSetSleepModeParams) { + constexpr auto n = __func__; +- static bool alreadyLoggedNoLfx = false; ++ static bool alreadyLoggedNoReflex = false; ++ static bool alreadyLoggedError = false; + + if (nvapiAdapterRegistry == nullptr) + return ApiNotInitialized(n); +@@ -130,19 +135,19 @@ extern "C" { + if (pSetSleepModeParams->version != NV_SET_SLEEP_MODE_PARAMS_VER1) + return IncompatibleStructVersion(n); + +- if (!nvapiD3dInstance->IsReflexAvailable()) +- return NoImplementation(n, alreadyLoggedNoLfx); ++ if (!nvapiD3dInstance->IsReflexAvailable(pDevice)) ++ return NoImplementation(n, alreadyLoggedNoReflex); + +- nvapiD3dInstance->SetReflexEnabled(pSetSleepModeParams->bLowLatencyMode); +- if (pSetSleepModeParams->bLowLatencyMode) +- nvapiD3dInstance->SetTargetFrameTime(pSetSleepModeParams->minimumIntervalUs); ++ if (!nvapiD3dInstance->SetReflexMode(pDevice, pSetSleepModeParams->bLowLatencyMode, pSetSleepModeParams->bLowLatencyBoost, pSetSleepModeParams->minimumIntervalUs)) ++ return Error(n, alreadyLoggedError); + + return Ok(str::format(n, " (", pSetSleepModeParams->bLowLatencyMode ? (str::format("Enabled/", pSetSleepModeParams->minimumIntervalUs, "us")) : "Disabled", ")")); + } + + NvAPI_Status __cdecl NvAPI_D3D_GetSleepStatus(IUnknown* pDevice, NV_GET_SLEEP_STATUS_PARAMS* pGetSleepStatusParams) { + constexpr auto n = __func__; +- static bool alreadyLoggedNoLfx = false; ++ static bool alreadyLoggedNoReflex = false; ++ static bool alreadyLoggedOk = false; + + if (nvapiAdapterRegistry == nullptr) + return ApiNotInitialized(n); +@@ -150,20 +155,53 @@ extern "C" { + if (pGetSleepStatusParams->version != NV_GET_SLEEP_STATUS_PARAMS_VER1) + return IncompatibleStructVersion(n); + +- if (!nvapiD3dInstance->IsReflexAvailable()) +- return NoImplementation(n, alreadyLoggedNoLfx); ++ if (!nvapiD3dInstance->IsReflexAvailable(pDevice)) ++ return NoImplementation(n, alreadyLoggedNoReflex); + +- pGetSleepStatusParams->bLowLatencyMode = nvapiD3dInstance->IsReflexEnabled(); +- return Ok(n); ++ pGetSleepStatusParams->bLowLatencyMode = nvapiD3dInstance->IsLowLatencyEnabled(); ++ ++ return Ok(n, alreadyLoggedOk); + } + + NvAPI_Status __cdecl NvAPI_D3D_GetLatency(IUnknown* pDev, NV_LATENCY_RESULT_PARAMS* pGetLatencyParams) { +- static bool alreadyLogged = false; +- return NoImplementation(__func__, alreadyLogged); ++ constexpr auto n = __func__; ++ static bool alreadyLoggedNoImpl = false; ++ static bool alreadyLoggedError = false; ++ static bool alreadyLoggedOk = false; ++ ++ if (nvapiAdapterRegistry == nullptr) ++ return ApiNotInitialized(n); ++ ++ if (pGetLatencyParams->version != NV_LATENCY_RESULT_PARAMS_VER1) ++ return IncompatibleStructVersion(n); ++ ++ if (nvapiD3dInstance->IsUsingLfx() || !NvapiD3dLowLatencyDevice::SupportsLowLatency(pDev)) ++ return NoImplementation(n, alreadyLoggedNoImpl); ++ ++ if (!NvapiD3dLowLatencyDevice::GetLatencyInfo(pDev, reinterpret_cast(pGetLatencyParams))) ++ return Error(n, alreadyLoggedError); ++ ++ return Ok(n, alreadyLoggedOk); + } + + NvAPI_Status __cdecl NvAPI_D3D_SetLatencyMarker(IUnknown* pDev, NV_LATENCY_MARKER_PARAMS* pSetLatencyMarkerParams) { +- static bool alreadyLogged = false; +- return NoImplementation(__func__, alreadyLogged); ++ constexpr auto n = __func__; ++ static bool alreadyLoggedNoImpl = false; ++ static bool alreadyLoggedError = false; ++ static bool alreadyLoggedOk = false; ++ ++ if (nvapiAdapterRegistry == nullptr) ++ return ApiNotInitialized(n); ++ ++ if (pSetLatencyMarkerParams->version != NV_LATENCY_MARKER_PARAMS_VER1) ++ return IncompatibleStructVersion(n); ++ ++ if (nvapiD3dInstance->IsUsingLfx() || !NvapiD3dLowLatencyDevice::SupportsLowLatency(pDev)) ++ return NoImplementation(n, alreadyLoggedNoImpl); ++ ++ if (!NvapiD3dLowLatencyDevice::SetLatencyMarker(pDev, pSetLatencyMarkerParams->frameID, pSetLatencyMarkerParams->markerType)) ++ return Error(n, alreadyLoggedError); ++ ++ return Ok(n, alreadyLoggedOk); + } + } +diff --git a/src/nvapi_d3d12.cpp b/src/nvapi_d3d12.cpp +index aadd7f22..c989cc42 100644 +--- a/src/nvapi_d3d12.cpp ++++ b/src/nvapi_d3d12.cpp +@@ -1,5 +1,6 @@ + #include "nvapi_private.h" + #include "nvapi_globals.h" ++#include "d3d/nvapi_d3d_low_latency_device.h" + #include "d3d12/nvapi_d3d12_device.h" + #include "util/util_statuscode.h" + #include "util/util_op_code.h" +@@ -397,4 +398,55 @@ extern "C" { + + return Ok(n, alreadyLoggedOk); + } ++ ++ NvAPI_Status __cdecl NvAPI_D3D12_NotifyOutOfBandCommandQueue(ID3D12CommandQueue* pCommandQueue, NV_OUT_OF_BAND_CQ_TYPE cqType) { ++ constexpr auto n = __func__; ++ static bool alreadyLoggedError = false; ++ static bool alreadyLoggedOk = false; ++ ++ if (nvapiAdapterRegistry == nullptr) ++ return ApiNotInitialized(n); ++ ++ if (pCommandQueue == nullptr) ++ return InvalidArgument(n); ++ ++ ID3D12Device* pDevice; ++ if (FAILED(pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice)))) ++ return InvalidArgument(n); ++ ++ if (nvapiD3dInstance->IsUsingLfx() || !NvapiD3dLowLatencyDevice::SupportsLowLatency(pDevice)) ++ return NoImplementation(n); ++ ++ if (!NvapiD3d12Device::NotifyOutOfBandCommandQueue(pCommandQueue, static_cast(cqType))) ++ return Error(n, alreadyLoggedError); ++ ++ return Ok(n, alreadyLoggedOk); ++ } ++ ++ NvAPI_Status __cdecl NvAPI_D3D12_SetAsyncFrameMarker(ID3D12CommandQueue* pCommandQueue, NV_LATENCY_MARKER_PARAMS* pSetLatencyMarkerParams) { ++ constexpr auto n = __func__; ++ static bool alreadyLoggedError = false; ++ static bool alreadyLoggedOk = false; ++ ++ if (nvapiAdapterRegistry == nullptr) ++ return ApiNotInitialized(n); ++ ++ if (pSetLatencyMarkerParams->version != NV_LATENCY_MARKER_PARAMS_VER1) ++ return IncompatibleStructVersion(n); ++ ++ if (pCommandQueue == nullptr) ++ return InvalidArgument(n); ++ ++ ID3D12Device* pDevice; ++ if (FAILED(pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice)))) ++ return InvalidArgument(n); ++ ++ if (nvapiD3dInstance->IsUsingLfx() || !NvapiD3dLowLatencyDevice::SupportsLowLatency(pDevice)) ++ return NoImplementation(n); ++ ++ if (!NvapiD3dLowLatencyDevice::SetLatencyMarker(pDevice, pSetLatencyMarkerParams->frameID, pSetLatencyMarkerParams->markerType)) ++ return Error(n, alreadyLoggedError); ++ ++ return Ok(n, alreadyLoggedOk); ++ } + } +diff --git a/src/nvapi_interface.cpp b/src/nvapi_interface.cpp +index b461166e..4b6f24b3 100644 +--- a/src/nvapi_interface.cpp ++++ b/src/nvapi_interface.cpp +@@ -69,6 +69,8 @@ extern "C" { + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetRaytracingCaps) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx) ++ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_NotifyOutOfBandCommandQueue) ++ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_SetAsyncFrameMarker) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D_GetObjectHandleForResource) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D_SetResourceHint) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D_GetCurrentSLIState) +diff --git a/src/shared/shared_interfaces.cpp b/src/shared/shared_interfaces.cpp +new file mode 100644 +index 00000000..2d675ebc +--- /dev/null ++++ b/src/shared/shared_interfaces.cpp +@@ -0,0 +1,3 @@ ++#include "shared_interfaces.h" ++ ++const GUID ID3DLowLatencyDevice::guid = {0xf3112584, 0x41f9, 0x348d, {0xa5, 0x9b, 0x00, 0xb7, 0xe1, 0xd2, 0x85, 0xd6}}; +diff --git a/src/shared/shared_interfaces.h b/src/shared/shared_interfaces.h +new file mode 100644 +index 00000000..bc5e0baa +--- /dev/null ++++ b/src/shared/shared_interfaces.h +@@ -0,0 +1,61 @@ ++#pragma once ++ ++#include "../nvapi_private.h" ++ ++#ifdef __GNUC__ ++#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" ++#endif // __GNUC__ ++ ++#define SHARED_DEFINE_GUID(iface) \ ++ template <> \ ++ inline GUID const& __mingw_uuidof() { return iface::guid; } ++ ++/** ++ * \brief Frame Report Info ++ */ ++typedef struct D3D_LATENCY_RESULTS { ++ UINT32 version; ++ struct D3D_FRAME_REPORT { ++ UINT64 frameID; ++ UINT64 inputSampleTime; ++ UINT64 simStartTime; ++ UINT64 simEndTime; ++ UINT64 renderSubmitStartTime; ++ UINT64 renderSubmitEndTime; ++ UINT64 presentStartTime; ++ UINT64 presentEndTime; ++ UINT64 driverStartTime; ++ UINT64 driverEndTime; ++ UINT64 osRenderQueueStartTime; ++ UINT64 osRenderQueueEndTime; ++ UINT64 gpuRenderStartTime; ++ UINT64 gpuRenderEndTime; ++ UINT32 gpuActiveRenderTimeUs; ++ UINT32 gpuFrameTimeUs; ++ UINT8 rsvd[120]; ++ } frame_reports[64]; ++ UINT8 rsvd[32]; ++} D3D_LATENCY_RESULTS; ++ ++MIDL_INTERFACE("f3112584-41f9-348d-a59b-00b7e1d285d6") ++ID3DLowLatencyDevice : public IUnknown { ++ static const GUID guid; ++ ++ virtual BOOL STDMETHODCALLTYPE SupportsLowLatency() = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE LatencySleep() = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE SetLatencySleepMode( ++ BOOL lowLatencyMode, ++ BOOL lowLatencyBoost, ++ uint32_t minimumIntervalUs) = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE SetLatencyMarker( ++ uint64_t frameID, ++ uint32_t markerType) = 0; ++ ++ virtual HRESULT STDMETHODCALLTYPE GetLatencyInfo( ++ D3D_LATENCY_RESULTS * latencyResults) = 0; ++}; ++ ++SHARED_DEFINE_GUID(ID3DLowLatencyDevice) +diff --git a/src/vkd3d-proton/vkd3d-proton_interfaces.cpp b/src/vkd3d-proton/vkd3d-proton_interfaces.cpp +index 03a4acd1..7b41fb7d 100644 +--- a/src/vkd3d-proton/vkd3d-proton_interfaces.cpp ++++ b/src/vkd3d-proton/vkd3d-proton_interfaces.cpp +@@ -20,3 +20,4 @@ + const GUID ID3D12DeviceExt::guid = {0x11ea7a1a, 0x0f6a, 0x49bf, {0xb6, 0x12, 0x3e, 0x30, 0xf8, 0xe2, 0x01, 0xdd}}; + const GUID ID3D12GraphicsCommandListExt::guid = {0x77a86b09, 0x2bea, 0x4801, {0xb8, 0x9a, 0x37, 0x64, 0x8e, 0x10, 0x4a, 0xf1}}; + const GUID ID3D12GraphicsCommandListExt1::guid = {0xd53b0028, 0xafb4, 0x4b65, {0xa4, 0xf1, 0x7b, 0x0d, 0xaa, 0xa6, 0x5b, 0x4f}}; ++const GUID ID3D12CommandQueueExt::guid = {0x40ed3f96, 0xe773, 0xe9bc, {0xfc, 0x0c, 0xe9, 0x55, 0x60, 0xc9, 0x9a, 0xd6}}; +diff --git a/src/vkd3d-proton/vkd3d-proton_interfaces.h b/src/vkd3d-proton/vkd3d-proton_interfaces.h +index 8f388ac6..b16f38d7 100644 +--- a/src/vkd3d-proton/vkd3d-proton_interfaces.h ++++ b/src/vkd3d-proton/vkd3d-proton_interfaces.h +@@ -29,7 +29,13 @@ + + enum D3D12_VK_EXTENSION : uint32_t { + D3D12_VK_NVX_BINARY_IMPORT = 0x1, +- D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2 ++ D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2, ++ D3D12_VK_NV_LOW_LATENCY_2 = 0x3 ++}; ++ ++enum D3D12_OUT_OF_BAND_CQ_TYPE : uint32_t { ++ D3D_OUT_OF_BAND_RENDER = 0x0, ++ D3D_OUT_OF_BAND_PRESENT = 0x1 + }; + + struct D3D12_CUBIN_DATA_HANDLE { +@@ -114,6 +120,15 @@ ID3D12GraphicsCommandListExt1 : public ID3D12GraphicsCommandListExt { + UINT32 raw_params_count) = 0; + }; + ++MIDL_INTERFACE("40ed3f96-e773-e9bc-fc0c-e95560c99ad6") ++ID3D12CommandQueueExt : public IUnknown { ++ static const GUID guid; ++ ++ virtual HRESULT STDMETHODCALLTYPE NotifyOutOfBandCommandQueue( ++ D3D12_OUT_OF_BAND_CQ_TYPE type) = 0; ++}; ++ + VKD3D_PROTON_GUID(ID3D12DeviceExt) + VKD3D_PROTON_GUID(ID3D12GraphicsCommandListExt) + VKD3D_PROTON_GUID(ID3D12GraphicsCommandListExt1) ++VKD3D_PROTON_GUID(ID3D12CommandQueueExt) +diff --git a/tests/meson.build b/tests/meson.build +index 5aca1d56..c967aed8 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -1,6 +1,7 @@ + nvapi_src = files([ + '../src/dxvk/dxvk_interfaces.cpp', + '../src/vkd3d-proton/vkd3d-proton_interfaces.cpp', ++ '../src/shared/shared_interfaces.cpp', + '../src/util/util_string.cpp', + '../src/util/util_env.cpp', + '../src/util/util_log.cpp', +@@ -8,6 +9,7 @@ nvapi_src = files([ + '../src/sysinfo/nvml.cpp', + '../src/d3d/lfx.cpp', + '../src/d3d/nvapi_d3d_instance.cpp', ++ '../src/d3d/nvapi_d3d_low_latency_device.cpp', + '../src/sysinfo/nvapi_output.cpp', + '../src/sysinfo/nvapi_adapter.cpp', + '../src/sysinfo/nvapi_adapter_registry.cpp', +diff --git a/tests/nvapi_d3d.cpp b/tests/nvapi_d3d.cpp +index 2b32d69e..afc10fec 100644 +--- a/tests/nvapi_d3d.cpp ++++ b/tests/nvapi_d3d.cpp +@@ -116,11 +116,16 @@ TEST_CASE("D3D Reflex/LatencyFleX depending methods succeed", "[.d3d]") { + auto vulkan = std::make_unique(); + auto nvml = std::make_unique(); + auto lfx = std::make_unique(); ++ D3DLowLatencyDeviceMock lowLatencyDevice; + DXGIDxvkAdapterMock adapter; + DXGIOutput6Mock output; ++ auto lowLatencyDeviceRefCount = 0; + + auto e = ConfigureDefaultTestEnvironment(*dxgiFactory, *vulkan, *nvml, *lfx, adapter, output); + ++ ALLOW_CALL(unknown, QueryInterface(ID3DLowLatencyDevice::guid, _)) ++ .RETURN(E_NOINTERFACE); ++ + ALLOW_CALL(*lfx, IsAvailable()) + .RETURN(false); + +@@ -223,17 +228,246 @@ TEST_CASE("D3D Reflex/LatencyFleX depending methods succeed", "[.d3d]") { + SECTION("Sleep returns NoImplementation") { + REQUIRE(NvAPI_D3D_Sleep(&unknown) == NVAPI_NO_IMPLEMENTATION); + } +- } + +- SECTION("GetLatency returns no-implementation") { +- NV_LATENCY_RESULT_PARAMS params; +- params.version = NV_LATENCY_RESULT_PARAMS_VER; +- REQUIRE(NvAPI_D3D_GetLatency(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ SECTION("GetLatency returns no-implementation") { ++ NV_LATENCY_RESULT_PARAMS params; ++ params.version = NV_LATENCY_RESULT_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_GetLatency(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ ++ SECTION("SetLatencyMarker returns no-implementation") { ++ NV_LATENCY_MARKER_PARAMS params; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_SetLatencyMarker(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } + } + +- SECTION("SetLatencyMarker returns no-implementation") { +- NV_LATENCY_MARKER_PARAMS params; +- params.version = NV_LATENCY_MARKER_PARAMS_VER; +- REQUIRE(NvAPI_D3D_SetLatencyMarker(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ SECTION("Reflex depending methods succeed when D3DLowLatencyDevice is available") { ++ ALLOW_CALL(unknown, QueryInterface(ID3DLowLatencyDevice::guid, _)) ++ .LR_SIDE_EFFECT(*_2 = static_cast(&lowLatencyDevice)) ++ .LR_SIDE_EFFECT(lowLatencyDeviceRefCount++) ++ .RETURN(S_OK); ++ ALLOW_CALL(lowLatencyDevice, AddRef()) ++ .LR_SIDE_EFFECT(lowLatencyDeviceRefCount++) ++ .RETURN(lowLatencyDeviceRefCount); ++ ALLOW_CALL(lowLatencyDevice, Release()) ++ .LR_SIDE_EFFECT(lowLatencyDeviceRefCount--) ++ .RETURN(lowLatencyDeviceRefCount); ++ ++ ALLOW_CALL(lowLatencyDevice, SupportsLowLatency()) ++ .RETURN(false); ++ ++ SECTION("D3DLowLatencyDevice does not support low latency") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ SECTION("GetSleepStatus returns NoImplementation") { ++ REQUIRE_CALL(lowLatencyDevice, SupportsLowLatency()) ++ .RETURN(false); ++ ++ NV_GET_SLEEP_STATUS_PARAMS_V1 params{}; ++ params.version = NV_GET_SLEEP_STATUS_PARAMS_VER1; ++ REQUIRE(NvAPI_D3D_GetSleepStatus(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ ++ SECTION("SetSleepMode returns NoImplementation") { ++ NV_SET_SLEEP_MODE_PARAMS params{}; ++ params.version = NV_SET_SLEEP_MODE_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_SetSleepMode(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ ++ SECTION("Sleep returns NoImplementation") { ++ REQUIRE(NvAPI_D3D_Sleep(&unknown) == NVAPI_NO_IMPLEMENTATION); ++ } ++ ++ SECTION("GetLatency returns no-implementation") { ++ NV_LATENCY_RESULT_PARAMS params; ++ params.version = NV_LATENCY_RESULT_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_GetLatency(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ ++ SECTION("SetLatencyMarker returns no-implementation") { ++ NV_LATENCY_MARKER_PARAMS params; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_SetLatencyMarker(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ } ++ ++ SECTION("D3DLowLatencyDevice supports low latency") { ++ ALLOW_CALL(lowLatencyDevice, SupportsLowLatency()) ++ .RETURN(true); ++ ++ SECTION("GetSleepStatus returns OK") { ++ REQUIRE_CALL(lowLatencyDevice, SupportsLowLatency()) ++ .RETURN(true); ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_GET_SLEEP_STATUS_PARAMS_V1 params{}; ++ params.version = NV_GET_SLEEP_STATUS_PARAMS_VER1; ++ REQUIRE(NvAPI_D3D_GetSleepStatus(&unknown, ¶ms) == NVAPI_OK); ++ } ++ ++ SECTION("SetSleepMode calls ID3DLowLatencyDevice::SetLatencySleepMode returns OK") { ++ FORBID_CALL(*lfx, SetTargetFrameTime(_)); // NOLINT(bugprone-use-after-move) ++ ++ REQUIRE_CALL(lowLatencyDevice, SetLatencySleepMode(true, false, 250U)) ++ .RETURN(S_OK); ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_SET_SLEEP_MODE_PARAMS_V1 params{}; ++ params.version = NV_SET_SLEEP_MODE_PARAMS_VER1; ++ params.bLowLatencyMode = true; ++ params.minimumIntervalUs = 250; ++ REQUIRE(NvAPI_D3D_SetSleepMode(&unknown, ¶ms) == NVAPI_OK); ++ } ++ ++ SECTION("Sleep calls ID3DLowLatencyDevice::LatencySleep and returns OK") { ++ FORBID_CALL(*lfx, SetTargetFrameTime(_)); // NOLINT(bugprone-use-after-move) ++ FORBID_CALL(*lfx, WaitAndBeginFrame()); ++ ++ REQUIRE_CALL(lowLatencyDevice, SetLatencySleepMode(true, false, 500U)) ++ .RETURN(S_OK); ++ REQUIRE_CALL(lowLatencyDevice, LatencySleep()) ++ .RETURN(S_OK); ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_SET_SLEEP_MODE_PARAMS sleepModeParams{}; ++ sleepModeParams.version = NV_SET_SLEEP_MODE_PARAMS_VER; ++ sleepModeParams.bLowLatencyMode = true; ++ sleepModeParams.minimumIntervalUs = 500; ++ REQUIRE(NvAPI_D3D_SetSleepMode(&unknown, &sleepModeParams) == NVAPI_OK); ++ REQUIRE(NvAPI_D3D_Sleep(&unknown) == NVAPI_OK); ++ } ++ ++ SECTION("SetLatencyMarker calls ID3DLowLatencyDevice::SetLatencyMarker and returns OK") { ++ REQUIRE_CALL(lowLatencyDevice, SetLatencySleepMode(true, false, 750U)) ++ .RETURN(S_OK); ++ REQUIRE_CALL(lowLatencyDevice, LatencySleep()) ++ .RETURN(S_OK); ++ REQUIRE_CALL(lowLatencyDevice, SetLatencyMarker(123ULL, SIMULATION_START)) ++ .RETURN(S_OK); ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_SET_SLEEP_MODE_PARAMS sleepModeParams{}; ++ sleepModeParams.version = NV_SET_SLEEP_MODE_PARAMS_VER; ++ sleepModeParams.bLowLatencyMode = true; ++ sleepModeParams.minimumIntervalUs = 750; ++ NV_LATENCY_MARKER_PARAMS latencyMarkerParams{}; ++ latencyMarkerParams.version = NV_LATENCY_MARKER_PARAMS_VER1; ++ latencyMarkerParams.frameID = 123ULL; ++ latencyMarkerParams.markerType = SIMULATION_START; ++ REQUIRE(NvAPI_D3D_SetSleepMode(&unknown, &sleepModeParams) == NVAPI_OK); ++ REQUIRE(NvAPI_D3D_Sleep(&unknown) == NVAPI_OK); ++ REQUIRE(NvAPI_D3D_SetLatencyMarker(&unknown, &latencyMarkerParams) == NVAPI_OK); ++ } ++ ++ SECTION("SetLatencyMarker with unknown struct version returns incompatible-struct-version") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_MARKER_PARAMS params{}; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER1 + 1; ++ REQUIRE(NvAPI_D3D_SetLatencyMarker(&unknown, ¶ms) == NVAPI_INCOMPATIBLE_STRUCT_VERSION); ++ } ++ ++ SECTION("SetLatencyMarker with current struct version returns not incompatible-struct-version") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ ALLOW_CALL(lowLatencyDevice, SetLatencyMarker(_, _)) ++ .RETURN(S_OK); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_MARKER_PARAMS params{}; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_SetLatencyMarker(&unknown, ¶ms) != NVAPI_INCOMPATIBLE_STRUCT_VERSION); ++ } ++ ++ SECTION("GetLatency calls ID3DLowLatencyDevice::GetLatencyInfo and returns OK") { ++ REQUIRE_CALL(lowLatencyDevice, SetLatencySleepMode(true, false, 1000U)) ++ .RETURN(S_OK); ++ REQUIRE_CALL(lowLatencyDevice, GetLatencyInfo(_)) ++ .RETURN(S_OK); ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_SET_SLEEP_MODE_PARAMS sleepModeParams{}; ++ sleepModeParams.version = NV_SET_SLEEP_MODE_PARAMS_VER; ++ sleepModeParams.bLowLatencyMode = true; ++ sleepModeParams.minimumIntervalUs = 1000; ++ NV_LATENCY_RESULT_PARAMS latencyResults{}; ++ latencyResults.version = NV_LATENCY_RESULT_PARAMS_VER1; ++ REQUIRE(NvAPI_D3D_SetSleepMode(&unknown, &sleepModeParams) == NVAPI_OK); ++ REQUIRE(NvAPI_D3D_GetLatency(&unknown, &latencyResults) == NVAPI_OK); ++ } ++ } ++ ++ SECTION("D3DLowLatencyDevice supports low latency and LFX is available") { ++ ALLOW_CALL(lowLatencyDevice, SupportsLowLatency()) ++ .RETURN(true); ++ ALLOW_CALL(*lfx, IsAvailable()) ++ .RETURN(true); // NOLINT(bugprone-use-after-move) ++ ++ SECTION("SetSleepMode calls Lfx::SetTargetFrameTime returns OK") { ++ REQUIRE_CALL(*lfx, SetTargetFrameTime(250ULL * 1000)); // NOLINT(bugprone-use-after-move) ++ ++ FORBID_CALL(lowLatencyDevice, SetLatencySleepMode(_, _, _)); ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_SET_SLEEP_MODE_PARAMS_V1 params{}; ++ params.version = NV_SET_SLEEP_MODE_PARAMS_VER1; ++ params.bLowLatencyMode = true; ++ params.minimumIntervalUs = 250; ++ REQUIRE(NvAPI_D3D_SetSleepMode(&unknown, ¶ms) == NVAPI_OK); ++ } ++ ++ SECTION("Sleep calls Lfx::WaitAndBeginFrame and returns OK") { ++ REQUIRE_CALL(*lfx, SetTargetFrameTime(500ULL * 1000)); // NOLINT(bugprone-use-after-move) ++ REQUIRE_CALL(*lfx, WaitAndBeginFrame()); ++ ++ FORBID_CALL(lowLatencyDevice, SetLatencySleepMode(_, _, _)); ++ FORBID_CALL(lowLatencyDevice, LatencySleep()); ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_SET_SLEEP_MODE_PARAMS params{}; ++ params.version = NV_SET_SLEEP_MODE_PARAMS_VER; ++ params.bLowLatencyMode = true; ++ params.minimumIntervalUs = 500; ++ REQUIRE(NvAPI_D3D_SetSleepMode(&unknown, ¶ms) == NVAPI_OK); ++ REQUIRE(NvAPI_D3D_Sleep(&unknown) == NVAPI_OK); ++ } ++ ++ SECTION("GetLatency returns no-implementation") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_RESULT_PARAMS params; ++ params.version = NV_LATENCY_RESULT_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_GetLatency(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ ++ SECTION("SetLatencyMarker returns no-implementation") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_MARKER_PARAMS params; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER; ++ REQUIRE(NvAPI_D3D_SetLatencyMarker(&unknown, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ } + } + } +diff --git a/tests/nvapi_d3d12.cpp b/tests/nvapi_d3d12.cpp +index 7e587280..05bf0ba9 100644 +--- a/tests/nvapi_d3d12.cpp ++++ b/tests/nvapi_d3d12.cpp +@@ -2,6 +2,7 @@ + #include "resource_factory_util.h" + #include "nvapi_sysinfo_mocks.h" + #include "nvapi_d3d12_mocks.h" ++#include "nvapi_d3d_mocks.h" + + using namespace trompeloeil; + +@@ -21,9 +22,13 @@ typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX_R520 { + + TEST_CASE("D3D12 methods succeed", "[.d3d12]") { + D3D12Vkd3dDeviceMock device; ++ D3D12Vkd3dCommandQueueMock commandQueue; ++ D3DLowLatencyDeviceMock lowLatencyDevice; + D3D12Vkd3dGraphicsCommandListMock commandList; + auto deviceRefCount = 0; + auto commandListRefCount = 0; ++ auto commandQueueRefCount = 0; ++ auto lowLatencyDeviceRefCount = 0; + + ALLOW_CALL(device, QueryInterface(ID3D12DeviceExt::guid, _)) + .LR_SIDE_EFFECT(*_2 = static_cast(&device)) +@@ -36,6 +41,9 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") { + .LR_SIDE_EFFECT(deviceRefCount--) + .RETURN(deviceRefCount); + ++ ALLOW_CALL(device, QueryInterface(ID3DLowLatencyDevice::guid, _)) ++ .RETURN(E_NOINTERFACE); ++ + ALLOW_CALL(device, GetExtensionSupport(_)) + .RETURN(true); + +@@ -58,6 +66,24 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") { + .LR_SIDE_EFFECT(commandListRefCount--) + .RETURN(commandListRefCount); + ++ ALLOW_CALL(commandQueue, QueryInterface(__uuidof(ID3D12CommandQueue), _)) ++ .LR_SIDE_EFFECT(*_2 = static_cast(&commandQueue)) ++ .LR_SIDE_EFFECT(commandQueueRefCount++) ++ .RETURN(S_OK); ++ ALLOW_CALL(commandQueue, QueryInterface(ID3D12CommandQueueExt::guid, _)) ++ .LR_SIDE_EFFECT(*_2 = static_cast(&commandQueue)) ++ .LR_SIDE_EFFECT(commandQueueRefCount++) ++ .RETURN(S_OK); ++ ALLOW_CALL(commandQueue, AddRef()) ++ .LR_SIDE_EFFECT(commandQueueRefCount++) ++ .RETURN(commandQueueRefCount); ++ ALLOW_CALL(commandQueue, Release()) ++ .LR_SIDE_EFFECT(commandQueueRefCount--) ++ .RETURN(commandQueueRefCount); ++ ALLOW_CALL(commandQueue, GetDevice(__uuidof(ID3D12Device), _)) ++ .LR_SIDE_EFFECT(*_2 = static_cast(&device)) ++ .RETURN(S_OK); ++ + SECTION("CreateGraphicsPipelineState for other than SetDepthBounds returns not-supported") { + FORBID_CALL(device, CreateGraphicsPipelineState(_, _, _)); + +@@ -748,4 +774,104 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") { + REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast(&commandList), ¶ms) != NVAPI_INCOMPATIBLE_STRUCT_VERSION); + } + } ++ ++ SECTION("D3DLowLatencyDevice methods succeed") { ++ auto dxgiFactory = std::make_unique(); ++ auto vulkan = std::make_unique(); ++ auto nvml = std::make_unique(); ++ auto lfx = std::make_unique(); ++ DXGIDxvkAdapterMock adapter; ++ DXGIOutput6Mock output; ++ ++ auto e = ConfigureDefaultTestEnvironment(*dxgiFactory, *vulkan, *nvml, *lfx, adapter, output); ++ ++ ALLOW_CALL(device, QueryInterface(ID3DLowLatencyDevice::guid, _)) ++ .LR_SIDE_EFFECT(*_2 = static_cast(&lowLatencyDevice)) ++ .LR_SIDE_EFFECT(lowLatencyDeviceRefCount++) ++ .RETURN(S_OK); ++ ALLOW_CALL(lowLatencyDevice, AddRef()) ++ .LR_SIDE_EFFECT(lowLatencyDeviceRefCount++) ++ .RETURN(lowLatencyDeviceRefCount); ++ ALLOW_CALL(lowLatencyDevice, Release()) ++ .LR_SIDE_EFFECT(lowLatencyDeviceRefCount--) ++ .RETURN(lowLatencyDeviceRefCount); ++ ++ ALLOW_CALL(lowLatencyDevice, SupportsLowLatency()) ++ .RETURN(true); ++ ++ SECTION("NotifyOutOfBandCommandQueue succeeds") { ++ SECTION("NotifyOutOfBandCommandQueue returns OK") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ REQUIRE_CALL(commandQueue, NotifyOutOfBandCommandQueue(static_cast(OUT_OF_BAND_RENDER))) ++ .RETURN(S_OK); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ REQUIRE(NvAPI_D3D12_NotifyOutOfBandCommandQueue(&commandQueue, OUT_OF_BAND_RENDER) == NVAPI_OK); ++ } ++ ++ SECTION("NotifyOutOfBandCommandQueue returns no-implementation with LFX") { ++ ALLOW_CALL(*lfx, IsAvailable()) ++ .RETURN(true); // NOLINT(bugprone-use-after-move) ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ REQUIRE(NvAPI_D3D12_NotifyOutOfBandCommandQueue(&commandQueue, OUT_OF_BAND_RENDER) == NVAPI_NO_IMPLEMENTATION); ++ } ++ } ++ ++ SECTION("SetAsyncFrameMarker succeeds") { ++ SECTION("SetAsyncFrameMarker returns OK") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ REQUIRE_CALL(lowLatencyDevice, SetLatencyMarker(123ULL, OUT_OF_BAND_RENDERSUBMIT_START)) ++ .RETURN(S_OK); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_MARKER_PARAMS params{}; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER1; ++ params.frameID = 123ULL; ++ params.markerType = OUT_OF_BAND_RENDERSUBMIT_START; ++ REQUIRE(NvAPI_D3D12_SetAsyncFrameMarker(&commandQueue, ¶ms) == NVAPI_OK); ++ } ++ ++ SECTION("SetAsyncFrameMarker returns no-implementation with LFX") { ++ ALLOW_CALL(*lfx, IsAvailable()) ++ .RETURN(true); // NOLINT(bugprone-use-after-move) ++ ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_MARKER_PARAMS params{}; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER1; ++ REQUIRE(NvAPI_D3D12_SetAsyncFrameMarker(&commandQueue, ¶ms) == NVAPI_NO_IMPLEMENTATION); ++ } ++ ++ SECTION("SetAsyncFrameMarker with unknown struct version returns incompatible-struct-version") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_MARKER_PARAMS params{}; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER1 + 1; ++ REQUIRE(NvAPI_D3D12_SetAsyncFrameMarker(&commandQueue, ¶ms) == NVAPI_INCOMPATIBLE_STRUCT_VERSION); ++ } ++ ++ SECTION("SetAsyncFrameMarker with current struct version returns not incompatible-struct-version") { ++ SetupResourceFactory(std::move(dxgiFactory), std::move(vulkan), std::move(nvml), std::move(lfx)); ++ ++ ALLOW_CALL(lowLatencyDevice, SetLatencyMarker(_, _)) ++ .RETURN(S_OK); ++ ++ REQUIRE(NvAPI_Initialize() == NVAPI_OK); ++ ++ NV_LATENCY_MARKER_PARAMS params{}; ++ params.version = NV_LATENCY_MARKER_PARAMS_VER; ++ REQUIRE(NvAPI_D3D12_SetAsyncFrameMarker(&commandQueue, ¶ms) != NVAPI_INCOMPATIBLE_STRUCT_VERSION); ++ } ++ } ++ } + } +diff --git a/tests/nvapi_d3d12_mocks.h b/tests/nvapi_d3d12_mocks.h +index a3cff713..cf6529f5 100644 +--- a/tests/nvapi_d3d12_mocks.h ++++ b/tests/nvapi_d3d12_mocks.h +@@ -164,3 +164,28 @@ class D3D12Vkd3dGraphicsCommandListMock final : public trompeloeil::mock_interfa + IMPLEMENT_MOCK6(LaunchCubinShader); + IMPLEMENT_MOCK9(LaunchCubinShaderEx); + }; ++ ++class ID3D12Vkd3dCommandQueue : public ID3D12CommandQueue, public ID3D12CommandQueueExt {}; ++ ++class D3D12Vkd3dCommandQueueMock final : public trompeloeil::mock_interface { ++ MAKE_MOCK2(QueryInterface, HRESULT(REFIID, void**), override); ++ MAKE_MOCK0(AddRef, ULONG(), override); ++ MAKE_MOCK0(Release, ULONG(), override); ++ IMPLEMENT_MOCK3(GetPrivateData); ++ IMPLEMENT_MOCK3(SetPrivateData); ++ IMPLEMENT_MOCK2(SetPrivateDataInterface); ++ IMPLEMENT_MOCK1(SetName); ++ IMPLEMENT_MOCK2(GetDevice); ++ IMPLEMENT_MOCK10(UpdateTileMappings); ++ IMPLEMENT_MOCK6(CopyTileMappings); ++ IMPLEMENT_MOCK2(ExecuteCommandLists); ++ IMPLEMENT_MOCK3(SetMarker); ++ IMPLEMENT_MOCK3(BeginEvent); ++ IMPLEMENT_MOCK0(EndEvent); ++ IMPLEMENT_MOCK2(Signal); ++ IMPLEMENT_MOCK2(Wait); ++ IMPLEMENT_MOCK1(GetTimestampFrequency); ++ IMPLEMENT_MOCK2(GetClockCalibration); ++ MAKE_MOCK1(GetDesc, D3D12_COMMAND_QUEUE_DESC*(D3D12_COMMAND_QUEUE_DESC*), override); ++ IMPLEMENT_MOCK1(NotifyOutOfBandCommandQueue); ++}; +\ No newline at end of file +diff --git a/tests/nvapi_d3d_mocks.h b/tests/nvapi_d3d_mocks.h +index d1e7ac72..472cfc54 100644 +--- a/tests/nvapi_d3d_mocks.h ++++ b/tests/nvapi_d3d_mocks.h +@@ -1,6 +1,7 @@ + #pragma once + + #include "nvapi_tests_private.h" ++#include "../src/shared/shared_interfaces.h" + #include "../src/d3d/lfx.h" + + class UnknownMock : public trompeloeil::mock_interface { +@@ -14,3 +15,14 @@ class LfxMock : public trompeloeil::mock_interface { + IMPLEMENT_MOCK0(WaitAndBeginFrame); + IMPLEMENT_MOCK1(SetTargetFrameTime); + }; ++ ++class D3DLowLatencyDeviceMock : public trompeloeil::mock_interface { ++ MAKE_MOCK2(QueryInterface, HRESULT(REFIID, void**), override); ++ MAKE_MOCK0(AddRef, ULONG(), override); ++ MAKE_MOCK0(Release, ULONG(), override); ++ IMPLEMENT_MOCK0(SupportsLowLatency); ++ IMPLEMENT_MOCK0(LatencySleep); ++ IMPLEMENT_MOCK3(SetLatencySleepMode); ++ IMPLEMENT_MOCK2(SetLatencyMarker); ++ IMPLEMENT_MOCK1(GetLatencyInfo); ++}; +\ No newline at end of file +diff --git a/tests/nvapi_tests_private.h b/tests/nvapi_tests_private.h +index e126e61b..ff1856ae 100644 +--- a/tests/nvapi_tests_private.h ++++ b/tests/nvapi_tests_private.h +@@ -2,6 +2,7 @@ + + #include "../src/nvapi_private.h" + #include "../src/nvapi_globals.h" ++#include "../src/d3d/nvapi_d3d_low_latency_device.h" + #include "../src/d3d11/nvapi_d3d11_device.h" + #include "../src/d3d12/nvapi_d3d12_device.h" + #include "../inc/catch_amalgamated.hpp" diff --git a/patches/proton/83-nv_low_latency_wine.patch b/patches/proton/83-nv_low_latency_wine.patch new file mode 100644 index 0000000000..07bba38313 --- /dev/null +++ b/patches/proton/83-nv_low_latency_wine.patch @@ -0,0 +1,95 @@ +From b7c8c08a4dfba8b7551f073790e8a2ae8b302312 Mon Sep 17 00:00:00 2001 +From: Eric Sullivan +Date: Wed, 11 Oct 2023 15:43:36 -0700 +Subject: [PATCH] winevulkan: Add support for VK_NV_low_latency2 + +To support VK_NV_low_latency2 a frame ID should be provided for each +call to vkQueueSubmit. This commit adds a path for fshack to provide +a frame ID when it submits its upscaling work, when low latency mode +is enabled. +--- + dlls/winevulkan/make_vulkan | 3 +++ + dlls/winevulkan/vulkan.c | 29 +++++++++++++++++++++++++++++ + dlls/winevulkan/vulkan_private.h | 1 + + 3 files changed, 33 insertions(+) + +diff --git a/dlls/winevulkan/make_vulkan b/dlls/winevulkan/make_vulkan +index 73681dcaff1..2fda357e454 100755 +--- a/dlls/winevulkan/make_vulkan ++++ b/dlls/winevulkan/make_vulkan +@@ -297,6 +297,9 @@ FUNCTION_OVERRIDES = { + # VK_KHR_synchronization2 + "vkQueueSubmit2KHR" : {"dispatch": True, "driver": False, "thunk" : ThunkType.PRIVATE, "extra_param" : "pSubmits"}, + ++ # VK_NV_low_latency2 ++ "vkSetLatencySleepModeNV" : {"dispatch": True, "driver": False, "thunk" : ThunkType.NONE}, ++ + # Custom functions + "wine_vkAcquireKeyedMutex" : {"dispatch": True, "driver": False, "thunk" : ThunkType.PRIVATE}, + "wine_vkReleaseKeyedMutex" : {"dispatch": True, "driver": False, "thunk" : ThunkType.PRIVATE}, +diff --git a/dlls/winevulkan/vulkan.c b/dlls/winevulkan/vulkan.c +index a20ec837e2a..7c81a12d595 100644 +--- a/dlls/winevulkan/vulkan.c ++++ b/dlls/winevulkan/vulkan.c +@@ -896,6 +896,24 @@ static void wine_vk_instance_free(struct wine_instance *instance) + free(instance); + } + ++VkResult wine_vkSetLatencySleepModeNV(VkDevice device, VkSwapchainKHR swapchain, const VkLatencySleepModeInfoNV *pSleepModeInfo) ++{ ++ VkLatencySleepModeInfoNV sleep_mode_info_host; ++ ++ struct wine_device* wine_device = wine_device_from_handle(device); ++ struct wine_swapchain* wine_swapchain = wine_swapchain_from_handle(swapchain); ++ ++ wine_device->low_latency_enabled = pSleepModeInfo->lowLatencyMode; ++ ++ sleep_mode_info_host.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV; ++ sleep_mode_info_host.pNext = NULL; ++ sleep_mode_info_host.lowLatencyMode = pSleepModeInfo->lowLatencyMode; ++ sleep_mode_info_host.lowLatencyBoost = pSleepModeInfo->lowLatencyBoost; ++ sleep_mode_info_host.minimumIntervalUs = pSleepModeInfo->minimumIntervalUs; ++ ++ return wine_device->funcs.p_vkSetLatencySleepModeNV(wine_device->device, wine_swapchain->swapchain, &sleep_mode_info_host); ++} ++ + VkResult wine_vkAllocateCommandBuffers(VkDevice handle, const VkCommandBufferAllocateInfo *allocate_info, + VkCommandBuffer *buffers ) + { +@@ -3921,6 +3939,8 @@ VkResult fshack_vk_queue_present(VkQueue queue_handle, const VkPresentInfoKHR *p + if (n_hacks > 0) + { + VkPipelineStageFlags waitStage, *waitStages, *waitStages_arr = NULL; ++ VkLatencySubmissionPresentIdNV latencySubmitInfo; ++ VkPresentIdKHR *present_id; + + if (pPresentInfo->waitSemaphoreCount > 1) + { +@@ -3944,6 +3964,15 @@ VkResult fshack_vk_queue_present(VkQueue queue_handle, const VkPresentInfoKHR *p + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = &blit_sema; + ++ if ((queue->device->low_latency_enabled) && ++ (present_id = wine_vk_find_struct(&our_presentInfo, PRESENT_ID_KHR))) ++ { ++ latencySubmitInfo.sType = VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV; ++ latencySubmitInfo.pNext = NULL; ++ latencySubmitInfo.presentID = *present_id->pPresentIds; ++ submitInfo.pNext = &latencySubmitInfo; ++ } ++ + res = queue->device->funcs.p_vkQueueSubmit(queue->queue, 1, &submitInfo, VK_NULL_HANDLE); + if (res != VK_SUCCESS) + ERR("vkQueueSubmit: %d\n", res); +diff --git a/dlls/winevulkan/vulkan_private.h b/dlls/winevulkan/vulkan_private.h +index c9548e944d2..47b2a9a5903 100644 +--- a/dlls/winevulkan/vulkan_private.h ++++ b/dlls/winevulkan/vulkan_private.h +@@ -99,6 +99,7 @@ struct wine_device + uint64_t sem_poll_update_value; /* set to sem_poll_update.value by signaller thread once update is processed. */ + unsigned int allocated_fence_ops_count; + BOOL keyed_mutexes_enabled; ++ BOOL low_latency_enabled; + }; + + static inline struct wine_device *wine_device_from_handle(VkDevice handle) diff --git a/patches/protonprep-valve-staging.sh b/patches/protonprep-valve-staging.sh index 4179d130d7..0c0ab6280f 100755 --- a/patches/protonprep-valve-staging.sh +++ b/patches/protonprep-valve-staging.sh @@ -375,5 +375,19 @@ #echo "WINE: -FSR- enable FSR flag by default (fixes broken fs hack scaling in some games like Apex and FFXIV)" #patch -Np1 < ../patches/proton/71-invert-fsr-logic.patch + echo "DXVK: -Nvidia Reflex- Add NV low latency support" + pushd dxvk; patch -Np1 < ../patches/proton/80-nv_low_latency_dxvk.patch; popd + + echo "VKD3D-PROTON: -Nvidia Reflex- Add NV low latency support" + pushd vkd3d-proton; patch -Np1 < ../patches/proton/81-nv_low_latency_vkd3d_proton.patch; popd + + echo "DXVK-NVAPI: -Nvidia Reflex- Add support for Reflex" + pushd dxvk-nvapi; patch -Np1 < ../patches/proton/82-nv_low_latency_dxvk_nvapi.patch; popd + + echo "WINE: -Nvidia Reflex- Support VK_NV_low_latency2" + pushd wine; patch -Np1 < ../patches/proton/83-nv_low_latency_wine.patch; popd + + git submodule update --recursive + ### END PROTON-GE ADDITIONAL CUSTOM PATCHES ### ### END WINE PATCHING ###