diff --git a/docs/fsr3.md b/docs/fsr3.md new file mode 100644 index 0000000000..5c32c1b0b1 --- /dev/null +++ b/docs/fsr3.md @@ -0,0 +1,7 @@ +# FSR3 + +1. Download the [FSR3 SDK](https://gpuopen.com/fidelityfx-super-resolution-3/). +2. Copy the `PrebuiltSignedDLL\amd_fidelityfx_dx12.dll` file from the SDK to the directory containing your executable (e.g., `scripts\tmp\vs2022\bin\Debug`). +3. The engine will automatically detect and utilize FSR3. + +Implementation can be found in [fsr3.cpp](../src/renderer/fsr3.cpp). \ No newline at end of file diff --git a/external/ffx_api/dx12/ffx_api_dx12.h b/external/ffx_api/dx12/ffx_api_dx12.h new file mode 100644 index 0000000000..503169dd56 --- /dev/null +++ b/external/ffx_api/dx12/ffx_api_dx12.h @@ -0,0 +1,298 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once +#include "../ffx_api.h" +#include "../ffx_api_types.h" +#include +#include +#include + +#define FFX_API_CREATE_CONTEXT_DESC_TYPE_BACKEND_DX12 0x0000002u +struct ffxCreateBackendDX12Desc +{ + ffxCreateContextDescHeader header; + ID3D12Device *device; ///< Device on which the backend will run. +}; + +#define FFX_API_EFFECT_ID_FRAMEGENERATIONSWAPCHAIN_DX12 0x00030000u + +#define FFX_API_CREATE_CONTEXT_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_WRAP_DX12 0x30001u +struct ffxCreateContextDescFrameGenerationSwapChainWrapDX12 +{ + ffxCreateContextDescHeader header; + IDXGISwapChain4** swapchain; ///< Input swap chain to wrap, output frame interpolation swapchain. + ID3D12CommandQueue* gameQueue; ///< Input command queue to be used for presentation. +}; + +#define FFX_API_CREATE_CONTEXT_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_NEW_DX12 0x30005u +struct ffxCreateContextDescFrameGenerationSwapChainNewDX12 +{ + ffxCreateContextDescHeader header; + IDXGISwapChain4** swapchain; ///< Output frame interpolation swapchain. + DXGI_SWAP_CHAIN_DESC* desc; ///< Swap chain creation parameters. + IDXGIFactory* dxgiFactory; ///< IDXGIFactory to use for DX12 swapchain creation. + ID3D12CommandQueue* gameQueue; ///< Input command queue to be used for presentation. +}; + +#define FFX_API_CREATE_CONTEXT_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_FOR_HWND_DX12 0x30006u +struct ffxCreateContextDescFrameGenerationSwapChainForHwndDX12 +{ + ffxCreateContextDescHeader header; + IDXGISwapChain4** swapchain; ///< Output frame interpolation swapchain. + HWND hwnd; ///< HWND handle for the calling application; + DXGI_SWAP_CHAIN_DESC1* desc; ///< Swap chain creation parameters. + DXGI_SWAP_CHAIN_FULLSCREEN_DESC* fullscreenDesc; ///< Fullscreen swap chain creation parameters. + IDXGIFactory* dxgiFactory; ///< IDXGIFactory to use for DX12 swapchain creation. + ID3D12CommandQueue* gameQueue; ///< Input command queue to be used for presentation. +}; + +#define FFX_API_CONFIGURE_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_REGISTERUIRESOURCE_DX12 0x30002u +struct ffxConfigureDescFrameGenerationSwapChainRegisterUiResourceDX12 +{ + ffxConfigureDescHeader header; + struct FfxApiResource uiResource; ///< Resource containing user interface for composition. May be empty. + uint32_t flags; ///< Zero or combination of values from FfxApiUiCompositionFlags. +}; + +#define FFX_API_QUERY_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_INTERPOLATIONCOMMANDLIST_DX12 0x30003u +struct ffxQueryDescFrameGenerationSwapChainInterpolationCommandListDX12 +{ + ffxQueryDescHeader header; + void** pOutCommandList; ///< Output command list (ID3D12GraphicsCommandList) to be used for frame generation dispatch. +}; + +#define FFX_API_QUERY_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_INTERPOLATIONTEXTURE_DX12 0x30004u +struct ffxQueryDescFrameGenerationSwapChainInterpolationTextureDX12 +{ + ffxQueryDescHeader header; + struct FfxApiResource *pOutTexture; ///< Output resource in which the frame interpolation result should be placed. +}; + +#define FFX_API_DISPATCH_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_WAIT_FOR_PRESENTS_DX12 0x30007u +struct ffxDispatchDescFrameGenerationSwapChainWaitForPresentsDX12 +{ + ffxDispatchDescHeader header; +}; + +#define FFX_API_CONFIGURE_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_KEYVALUE_DX12 0x30008u +struct ffxConfigureDescFrameGenerationSwapChainKeyValueDX12 +{ + ffxConfigureDescHeader header; + uint64_t key; ///< Configuration key, member of the FfxApiConfigureFrameGenerationSwapChainKeyDX12 enumeration. + uint64_t u64; ///< Integer value or enum value to set. + void* ptr; ///< Pointer to set or pointer to value to set. +}; + +enum FfxApiConfigureFrameGenerationSwapChainKeyDX12 +{ + FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_WAITCALLBACK = 0 ///< Sets FfxWaitCallbackFunc +}; + +#if defined(__cplusplus) + +static inline uint32_t ffxApiGetSurfaceFormatDX12(DXGI_FORMAT format) +{ + switch (format) + { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + return FFX_API_SURFACE_FORMAT_R32G32B32A32_TYPELESS; + case DXGI_FORMAT_R32G32B32A32_FLOAT: + return FFX_API_SURFACE_FORMAT_R32G32B32A32_FLOAT; + case DXGI_FORMAT_R32G32B32A32_UINT: + return FFX_API_SURFACE_FORMAT_R32G32B32A32_UINT; + //case DXGI_FORMAT_R32G32B32A32_SINT: + //case DXGI_FORMAT_R32G32B32_TYPELESS: + //case DXGI_FORMAT_R32G32B32_FLOAT: + //case DXGI_FORMAT_R32G32B32_UINT: + //case DXGI_FORMAT_R32G32B32_SINT: + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + return FFX_API_SURFACE_FORMAT_R16G16B16A16_FLOAT; + //case DXGI_FORMAT_R16G16B16A16_UNORM: + //case DXGI_FORMAT_R16G16B16A16_UINT: + //case DXGI_FORMAT_R16G16B16A16_SNORM: + //case DXGI_FORMAT_R16G16B16A16_SINT: + + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + return FFX_API_SURFACE_FORMAT_R32G32_FLOAT; + //case DXGI_FORMAT_R32G32_FLOAT: + //case DXGI_FORMAT_R32G32_UINT: + //case DXGI_FORMAT_R32G32_SINT: + + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + return FFX_API_SURFACE_FORMAT_R32_FLOAT; + + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + return FFX_API_SURFACE_FORMAT_R32_UINT; + + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return FFX_API_SURFACE_FORMAT_R8_UINT; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + return FFX_API_SURFACE_FORMAT_R10G10B10A2_UNORM; + //case DXGI_FORMAT_R10G10B10A2_UINT: + + case DXGI_FORMAT_R11G11B10_FLOAT: + return FFX_API_SURFACE_FORMAT_R11G11B10_FLOAT; + + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + return FFX_API_SURFACE_FORMAT_R8G8B8A8_TYPELESS; + case DXGI_FORMAT_R8G8B8A8_UNORM: + return FFX_API_SURFACE_FORMAT_R8G8B8A8_UNORM; + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + return FFX_API_SURFACE_FORMAT_R8G8B8A8_SRGB; + //case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + return FFX_API_SURFACE_FORMAT_R8G8B8A8_SNORM; + + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + return FFX_API_SURFACE_FORMAT_B8G8R8A8_TYPELESS; + case DXGI_FORMAT_B8G8R8A8_UNORM: + return FFX_API_SURFACE_FORMAT_B8G8R8A8_UNORM; + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + return FFX_API_SURFACE_FORMAT_B8G8R8A8_SRGB; + + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + return FFX_API_SURFACE_FORMAT_R16G16_FLOAT; + //case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + return FFX_API_SURFACE_FORMAT_R16G16_UINT; + //case DXGI_FORMAT_R16G16_SNORM + //case DXGI_FORMAT_R16G16_SINT + + //case DXGI_FORMAT_R32_SINT: + case DXGI_FORMAT_R32_UINT: + return FFX_API_SURFACE_FORMAT_R32_UINT; + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + return FFX_API_SURFACE_FORMAT_R32_FLOAT; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UINT: + return FFX_API_SURFACE_FORMAT_R8G8_UINT; + //case DXGI_FORMAT_R8G8_UNORM: + //case DXGI_FORMAT_R8G8_SNORM: + //case DXGI_FORMAT_R8G8_SINT: + + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + return FFX_API_SURFACE_FORMAT_R16_FLOAT; + case DXGI_FORMAT_R16_UINT: + return FFX_API_SURFACE_FORMAT_R16_UINT; + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + return FFX_API_SURFACE_FORMAT_R16_UNORM; + case DXGI_FORMAT_R16_SNORM: + return FFX_API_SURFACE_FORMAT_R16_SNORM; + //case DXGI_FORMAT_R16_SINT: + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_A8_UNORM: + return FFX_API_SURFACE_FORMAT_R8_UNORM; + case DXGI_FORMAT_R8_UINT: + return FFX_API_SURFACE_FORMAT_R8_UINT; + //case DXGI_FORMAT_R8_SNORM: + //case DXGI_FORMAT_R8_SINT: + //case DXGI_FORMAT_R1_UNORM: + + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + return FFX_API_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP; + + case DXGI_FORMAT_UNKNOWN: + default: + return FFX_API_SURFACE_FORMAT_UNKNOWN; + } +} + +static inline FfxApiResource ffxApiGetResourceDX12(ID3D12Resource* pRes, uint32_t state = FFX_API_RESOURCE_STATE_COMPUTE_READ, uint32_t additionalUsages = 0) +{ + FfxApiResource res{}; + res.resource = pRes; + res.state = state; + if (!pRes) return res; + + D3D12_RESOURCE_DESC desc = pRes->GetDesc(); + if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + res.description.flags = FFX_API_RESOURCE_FLAGS_NONE; + res.description.usage = FFX_API_RESOURCE_USAGE_UAV; + res.description.size = static_cast(desc.Width); + res.description.stride = static_cast(desc.Height); + res.description.type = FFX_API_RESOURCE_TYPE_BUFFER; + } + else + { + res.description.flags = FFX_API_RESOURCE_FLAGS_NONE; + if (desc.Format == DXGI_FORMAT_D16_UNORM || desc.Format == DXGI_FORMAT_D32_FLOAT || desc.Format == DXGI_FORMAT_D24_UNORM_S8_UINT || desc.Format == DXGI_FORMAT_D32_FLOAT_S8X24_UINT) + { + res.description.usage = FFX_API_RESOURCE_USAGE_DEPTHTARGET; + } + else + { + res.description.usage = FFX_API_RESOURCE_USAGE_READ_ONLY; + } + + if (desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + res.description.usage |= FFX_API_RESOURCE_USAGE_UAV; + + res.description.width = static_cast(desc.Width); + res.description.height = static_cast(desc.Height); + res.description.depth = static_cast(desc.DepthOrArraySize); + res.description.mipCount = static_cast(desc.MipLevels); + + switch (desc.Dimension) + { + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + res.description.type = FFX_API_RESOURCE_TYPE_TEXTURE1D; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + if (desc.DepthOrArraySize == 6) + res.description.type = FFX_API_RESOURCE_TYPE_TEXTURE_CUBE; + else + res.description.type = FFX_API_RESOURCE_TYPE_TEXTURE2D; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + res.description.type = FFX_API_RESOURCE_TYPE_TEXTURE3D; + break; + default: + break; + } + } + + res.description.format = ffxApiGetSurfaceFormatDX12(desc.Format); + res.description.usage |= additionalUsages; + return res; +} + +#endif diff --git a/external/ffx_api/ffx_api.h b/external/ffx_api/ffx_api.h new file mode 100644 index 0000000000..0b192d58cb --- /dev/null +++ b/external/ffx_api/ffx_api.h @@ -0,0 +1,150 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#if defined(__cplusplus) +extern "C" { +#endif // #if defined(__cplusplus) + +#define FFX_API_ENTRY __declspec(dllexport) +#include + +enum FfxApiReturnCodes +{ + FFX_API_RETURN_OK = 0, ///< The oparation was successful. + FFX_API_RETURN_ERROR = 1, ///< An error occurred that is not further specified. + FFX_API_RETURN_ERROR_UNKNOWN_DESCTYPE = 2, ///< The structure type given was not recognized for the function or context with which it was used. This is likely a programming error. + FFX_API_RETURN_ERROR_RUNTIME_ERROR = 3, ///< The underlying runtime (e.g. D3D12, Vulkan) or effect returned an error code. + FFX_API_RETURN_NO_PROVIDER = 4, ///< No provider was found for the given structure type. This is likely a programming error. + FFX_API_RETURN_ERROR_MEMORY = 5, ///< A memory allocation failed. + FFX_API_RETURN_ERROR_PARAMETER = 6, ///< A parameter was invalid, e.g. a null pointer, empty resource or out-of-bounds enum value. +}; + +typedef void* ffxContext; +typedef uint32_t ffxReturnCode_t; + +#define FFX_API_EFFECT_MASK 0xffff0000u +#define FFX_API_EFFECT_ID_GENERAL 0x00000000u + +// Base Descriptor types +typedef uint64_t ffxStructType_t; +typedef struct ffxApiHeader +{ + ffxStructType_t type; ///< The structure type. Must always be set to the corresponding value for any structure (found nearby with a similar name). + struct ffxApiHeader* pNext; ///< Pointer to next structure, used for optional parameters and extensions. Can be null. +} ffxApiHeader; + +typedef ffxApiHeader ffxCreateContextDescHeader; +typedef ffxApiHeader ffxConfigureDescHeader; +typedef ffxApiHeader ffxQueryDescHeader; +typedef ffxApiHeader ffxDispatchDescHeader; + +// Extensions for global debug +#define FFX_API_CONFIGURE_GLOBALDEBUG_LEVEL_SILENCE 0x0000000u +#define FFX_API_CONFIGURE_GLOBALDEBUG_LEVEL_ERRORS 0x0000001u +#define FFX_API_CONFIGURE_GLOBALDEBUG_LEVEL_WARNINGS 0x0000002u +#define FFX_API_CONFIGURE_GLOBALDEBUG_LEVEL_VERBOSE 0xfffffffu + +enum FfxApiMsgType +{ + FFX_API_MESSAGE_TYPE_ERROR = 0, + FFX_API_MESSAGE_TYPE_WARNING = 1, + FFX_API_MESSAGE_TYPE_COUNT +}; + +typedef void (*ffxApiMessage)(uint32_t type, const wchar_t* message); + +#define FFX_API_CONFIGURE_DESC_TYPE_GLOBALDEBUG1 0x0000001u +struct ffxConfigureDescGlobalDebug1 +{ + ffxConfigureDescHeader header; + ffxApiMessage fpMessage; + uint32_t debugLevel; +}; + +#define FFX_API_QUERY_DESC_TYPE_GET_VERSIONS 4u +struct ffxQueryDescGetVersions +{ + ffxQueryDescHeader header; + uint64_t createDescType; ///< Create description for the effect whose versions should be enumerated. + void* device; ///< For DX12: pointer to ID3D12Device. + uint64_t *outputCount; ///< Input capacity of id and name arrays. Output number of returned versions. If initially zero, output is number of available versions. + uint64_t *versionIds; ///< Output array of version ids to be used as version overrides. If null, only names and count are returned. + const char** versionNames; ///< Output array of version names for display. If null, only ids and count are returned. If both this and versionIds are null, only count is returned. +}; + +#define FFX_API_DESC_TYPE_OVERRIDE_VERSION 5u +struct ffxOverrideVersion +{ + ffxApiHeader header; + uint64_t versionId; ///< Id of version to use. Must be a value returned from a query in ffxQueryDescGetVersions.versionIds array. +}; + +// Memory allocation function. Must return a valid pointer to at least size bytes of memory aligned to hold any type. +// May return null to indicate failure. Standard library malloc fulfills this requirement. +typedef void* (*ffxAlloc)(void* pUserData, uint64_t size); + +// Memory deallocation function. May be called with null pointer as second argument. +typedef void (*ffxDealloc)(void* pUserData, void* pMem); + +typedef struct ffxAllocationCallbacks +{ + void* pUserData; + ffxAlloc alloc; + ffxDealloc dealloc; +} ffxAllocationCallbacks; + +// Creates a FFX object context. +// Depending on the desc structures provided to this function, the context will be created with the desired version and attributes. +// Non-zero return indicates error code. +// Pointers passed in desc must remain live until ffxDestroyContext is called on the context. +// MemCb may be null; the system allocator (malloc/free) will be used in this case. +FFX_API_ENTRY ffxReturnCode_t ffxCreateContext(ffxContext* context, ffxCreateContextDescHeader* desc, const ffxAllocationCallbacks* memCb); +typedef ffxReturnCode_t (*PfnFfxCreateContext)(ffxContext* context, ffxCreateContextDescHeader* desc, const ffxAllocationCallbacks* memCb); + +// Destroys an FFX object context. +// Non-zero return indicates error code. +// MemCb must be compatible with the callbacks passed into ffxCreateContext. +FFX_API_ENTRY ffxReturnCode_t ffxDestroyContext(ffxContext* context, const ffxAllocationCallbacks* memCb); +typedef ffxReturnCode_t (*PfnFfxDestroyContext)(ffxContext* context, const ffxAllocationCallbacks* memCb); + +// Configures the provided FFX object context. +// If context is null, configure operates on any global state. +// Non-zero return indicates error code. +FFX_API_ENTRY ffxReturnCode_t ffxConfigure(ffxContext* context, const ffxConfigureDescHeader* desc); +typedef ffxReturnCode_t (*PfnFfxConfigure)(ffxContext* context, const ffxConfigureDescHeader* desc); + +// Queries the provided FFX object context. +// If context is null, query operates on any global state. +// Non-zero return indicates error code. +FFX_API_ENTRY ffxReturnCode_t ffxQuery(ffxContext* context, ffxQueryDescHeader* desc); +typedef ffxReturnCode_t (*PfnFfxQuery)(ffxContext* context, ffxQueryDescHeader* desc); + +// Dispatches work on the given FFX object context defined by the dispatch descriptor. +// Non-zero return indicates error code. +FFX_API_ENTRY ffxReturnCode_t ffxDispatch(ffxContext* context, const ffxDispatchDescHeader* desc); +typedef ffxReturnCode_t (*PfnFfxDispatch)(ffxContext* context, const ffxDispatchDescHeader* desc); + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/external/ffx_api/ffx_api_types.h b/external/ffx_api/ffx_api_types.h new file mode 100644 index 0000000000..5def9536cd --- /dev/null +++ b/external/ffx_api/ffx_api_types.h @@ -0,0 +1,176 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +/// An enumeration of surface formats. +enum FfxApiSurfaceFormat +{ + FFX_API_SURFACE_FORMAT_UNKNOWN, ///< Unknown format + FFX_API_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format + FFX_API_SURFACE_FORMAT_R32G32B32A32_UINT, ///< 32 bit per channel, 4 channel uint format + FFX_API_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format + FFX_API_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format + FFX_API_SURFACE_FORMAT_R32G32B32_FLOAT, ///< 32 bit per channel, 3 channel float format + FFX_API_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format + FFX_API_SURFACE_FORMAT_R8_UINT, ///< 8 bit per channel, 1 channel float format + FFX_API_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format + FFX_API_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel typeless format + FFX_API_SURFACE_FORMAT_R8G8B8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format + FFX_API_SURFACE_FORMAT_R8G8B8A8_SNORM, ///< 8 bit per channel, 4 channel signed normalized format + FFX_API_SURFACE_FORMAT_R8G8B8A8_SRGB, ///< 8 bit per channel, 4 channel srgb normalized + FFX_API_SURFACE_FORMAT_B8G8R8A8_TYPELESS, ///< 8 bit per channel, 4 channel typeless format + FFX_API_SURFACE_FORMAT_B8G8R8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format + FFX_API_SURFACE_FORMAT_B8G8R8A8_SRGB, ///< 8 bit per channel, 4 channel srgb normalized + FFX_API_SURFACE_FORMAT_R11G11B10_FLOAT, ///< 32 bit 3 channel float format + FFX_API_SURFACE_FORMAT_R10G10B10A2_UNORM, ///< 10 bit per 3 channel, 2 bit for 1 channel normalized format + FFX_API_SURFACE_FORMAT_R16G16_FLOAT, ///< 16 bit per channel, 2 channel float format + FFX_API_SURFACE_FORMAT_R16G16_UINT, ///< 16 bit per channel, 2 channel unsigned int format + FFX_API_SURFACE_FORMAT_R16G16_SINT, ///< 16 bit per channel, 2 channel signed int format + FFX_API_SURFACE_FORMAT_R16_FLOAT, ///< 16 bit per channel, 1 channel float format + FFX_API_SURFACE_FORMAT_R16_UINT, ///< 16 bit per channel, 1 channel unsigned int format + FFX_API_SURFACE_FORMAT_R16_UNORM, ///< 16 bit per channel, 1 channel unsigned normalized format + FFX_API_SURFACE_FORMAT_R16_SNORM, ///< 16 bit per channel, 1 channel signed normalized format + FFX_API_SURFACE_FORMAT_R8_UNORM, ///< 8 bit per channel, 1 channel unsigned normalized format + FFX_API_SURFACE_FORMAT_R8G8_UNORM, ///< 8 bit per channel, 2 channel unsigned normalized format + FFX_API_SURFACE_FORMAT_R8G8_UINT, ///< 8 bit per channel, 2 channel unsigned integer format + FFX_API_SURFACE_FORMAT_R32_FLOAT, ///< 32 bit per channel, 1 channel float format + FFX_API_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP, ///< 9 bit per channel, 5 bit exponent format +}; + +/// An enumeration of resource usage. +enum FfxApiResorceUsage +{ + FFX_API_RESOURCE_USAGE_READ_ONLY = 0, ///< No usage flags indicate a resource is read only. + FFX_API_RESOURCE_USAGE_RENDERTARGET = (1<<0), ///< Indicates a resource will be used as render target. + FFX_API_RESOURCE_USAGE_UAV = (1<<1), ///< Indicates a resource will be used as UAV. + FFX_API_RESOURCE_USAGE_DEPTHTARGET = (1<<2), ///< Indicates a resource will be used as depth target. + FFX_API_RESOURCE_USAGE_INDIRECT = (1<<3), ///< Indicates a resource will be used as indirect argument buffer + FFX_API_RESOURCE_USAGE_ARRAYVIEW = (1<<4), ///< Indicates a resource that will generate array views. Works on 2D and cubemap textures +}; + +/// An enumeration of resource states. +enum FfxApiResourceState +{ + FFX_API_RESOURCE_STATE_COMMON = (1 << 0), + FFX_API_RESOURCE_STATE_UNORDERED_ACCESS = (1 << 1), ///< Indicates a resource is in the state to be used as UAV. + FFX_API_RESOURCE_STATE_COMPUTE_READ = (1 << 2), ///< Indicates a resource is in the state to be read by compute shaders. + FFX_API_RESOURCE_STATE_PIXEL_READ = (1 << 3), ///< Indicates a resource is in the state to be read by pixel shaders. + FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ = (FFX_API_RESOURCE_STATE_PIXEL_READ | FFX_API_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in the state to be read by pixel or compute shaders. + FFX_API_RESOURCE_STATE_COPY_SRC = (1 << 4), ///< Indicates a resource is in the state to be used as source in a copy command. + FFX_API_RESOURCE_STATE_COPY_DEST = (1 << 5), ///< Indicates a resource is in the state to be used as destination in a copy command. + FFX_API_RESOURCE_STATE_GENERIC_READ = (FFX_API_RESOURCE_STATE_COPY_SRC | FFX_API_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in generic (slow) read state. + FFX_API_RESOURCE_STATE_INDIRECT_ARGUMENT = (1 << 6), ///< Indicates a resource is in the state to be used as an indirect command argument + FFX_API_RESOURCE_STATE_PRESENT = (1 << 7), ///< Indicates a resource is in the state to be used to present to the swap chain + FFX_API_RESOURCE_STATE_RENDER_TARGET = (1 << 8), ///< Indicates a resource is in the state to be used as render target +}; + +/// An enumeration of surface dimensions. +enum FfxApiResourceDimension +{ + FFX_API_RESOURCE_DIMENSION_TEXTURE_1D, ///< A resource with a single dimension. + FFX_API_RESOURCE_DIMENSION_TEXTURE_2D, ///< A resource with two dimensions. +}; + +/// An enumeration of resource flags. +enum FfxApiResourceFlags +{ + FFX_API_RESOURCE_FLAGS_NONE = 0, ///< No flags. + FFX_API_RESOURCE_FLAGS_ALIASABLE = (1 << 0), ///< A bit indicating a resource does not need to persist across frames. + FFX_API_RESOURCE_FLAGS_UNDEFINED = (1 << 1), ///< Special case flag used internally when importing resources that require additional setup +}; + +// An enumeration for different resource types +enum FfxApiResourceType +{ + FFX_API_RESOURCE_TYPE_BUFFER, ///< The resource is a buffer. + FFX_API_RESOURCE_TYPE_TEXTURE1D, ///< The resource is a 1-dimensional texture. + FFX_API_RESOURCE_TYPE_TEXTURE2D, ///< The resource is a 2-dimensional texture. + FFX_API_RESOURCE_TYPE_TEXTURE_CUBE, ///< The resource is a cube map. + FFX_API_RESOURCE_TYPE_TEXTURE3D, ///< The resource is a 3-dimensional texture. +}; + +enum FfxApiBackbufferTransferFunction +{ + FFX_API_BACKBUFFER_TRANSFER_FUNCTION_SRGB, + FFX_API_BACKBUFFER_TRANSFER_FUNCTION_PQ, + FFX_API_BACKBUFFER_TRANSFER_FUNCTION_SCRGB +}; + +/// A structure encapsulating a 2-dimensional point, using 32bit unsigned integers. +struct FfxApiDimensions2D +{ + uint32_t width; ///< The width of a 2-dimensional range. + uint32_t height; ///< The height of a 2-dimensional range. +}; + +/// A structure encapsulating a 2-dimensional set of floating point coordinates. +struct FfxApiFloatCoords2D +{ + float x; ///< The x coordinate of a 2-dimensional point. + float y; ///< The y coordinate of a 2-dimensional point. +}; + +/// A structure encapsulating a 2-dimensional rect. +struct FfxApiRect2D +{ + int32_t left; + int32_t top; + int32_t width; + int32_t height; +}; + +/// A structure describing a resource. +/// +/// @ingroup SDKTypes +struct FfxApiResourceDescription +{ + uint32_t type; ///< The type of the resource. + uint32_t format; ///< The surface format. + union { + uint32_t width; ///< The width of the texture resource. + uint32_t size; ///< The size of the buffer resource. + }; + + union { + uint32_t height; ///< The height of the texture resource. + uint32_t stride; ///< The stride of the buffer resource. + }; + + union { + uint32_t depth; ///< The depth of the texture resource. + uint32_t alignment; ///< The alignment of the buffer resource. + }; + + uint32_t mipCount; ///< Number of mips (or 0 for full mipchain). + uint32_t flags; ///< A set of resource flags. + uint32_t usage; ///< Resource usage flags. +}; + +struct FfxApiResource +{ + void* resource; + struct FfxApiResourceDescription description; + uint32_t state; +}; diff --git a/external/ffx_api/ffx_upscale.h b/external/ffx_api/ffx_upscale.h new file mode 100644 index 0000000000..7e136c83f1 --- /dev/null +++ b/external/ffx_api/ffx_upscale.h @@ -0,0 +1,175 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once +#include "ffx_api.h" +#include "ffx_api_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum FfxApiUpscaleQualityMode +{ + FFX_UPSCALE_QUALITY_MODE_NATIVEAA = 0, ///< Perform upscaling with a per-dimension upscaling ratio of 1.0x. + FFX_UPSCALE_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x. + FFX_UPSCALE_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x. + FFX_UPSCALE_QUALITY_MODE_PERFORMANCE = 3, ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x. + FFX_UPSCALE_QUALITY_MODE_ULTRA_PERFORMANCE = 4 ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x. +}; + +enum FfxApiCreateContextUpscaleFlags +{ + FFX_UPSCALE_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided is using a high-dynamic range. + FFX_UPSCALE_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<1), ///< A bit indicating if the motion vectors are rendered at display resolution. + FFX_UPSCALE_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION = (1<<2), ///< A bit indicating that the motion vectors have the jittering pattern applied to them. + FFX_UPSCALE_ENABLE_DEPTH_INVERTED = (1<<3), ///< A bit indicating that the input depth buffer data provided is inverted [1..0]. + FFX_UPSCALE_ENABLE_DEPTH_INFINITE = (1<<4), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane. + FFX_UPSCALE_ENABLE_AUTO_EXPOSURE = (1<<5), ///< A bit indicating if automatic exposure should be applied to input color data. + FFX_UPSCALE_ENABLE_DYNAMIC_RESOLUTION = (1<<6), ///< A bit indicating that the application uses dynamic resolution scaling. + FFX_UPSCALE_ENABLE_DEBUG_CHECKING = (1<<7), ///< A bit indicating that the runtime should check some API values and report issues. + FFX_UPSCALE_ENABLE_NON_LINEAR_COLORSPACE = (1<<8), ///< A bit indicating that the color resource contains perceptual (gamma corrected) colors +}; + +enum FfxApiDispatchFsrUpscaleFlags +{ + FFX_UPSCALE_FLAG_DRAW_DEBUG_VIEW = (1 << 0), ///< A bit indicating that the output resource will contain debug views with relevant information. +}; + +enum FfxApiDispatchUpscaleAutoreactiveFlags +{ + FFX_UPSCALE_AUTOREACTIVEFLAGS_APPLY_TONEMAP = (1<<0), + FFX_UPSCALE_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP = (1<<1), + FFX_UPSCALE_AUTOREACTIVEFLAGS_APPLY_THRESHOLD = (1<<2), + FFX_UPSCALE_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX = (1<<3), +}; + +#define FFX_API_EFFECT_ID_UPSCALE 0x00010000u + +#define FFX_API_CREATE_CONTEXT_DESC_TYPE_UPSCALE 0x00010000u +struct ffxCreateContextDescUpscale +{ + ffxCreateContextDescHeader header; + uint32_t flags; ///< Zero or a combination of values from FfxApiCreateContextFsrFlags. + struct FfxApiDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. + struct FfxApiDimensions2D maxUpscaleSize; ///< The size of the presentation resolution targeted by the upscaling process. + ffxApiMessage fpMessage; ///< A pointer to a function that can receive messages from the runtime. May be null. +}; + +#define FFX_API_DISPATCH_DESC_TYPE_UPSCALE 0x00010001u +struct ffxDispatchDescUpscale +{ + ffxDispatchDescHeader header; + void* commandList; ///< Command list to record upscaling rendering commands into. + struct FfxApiResource color; ///< Color buffer for the current frame (at render resolution). + struct FfxApiResource depth; ///< 32bit depth values for the current frame (at render resolution). + struct FfxApiResource motionVectors; ///< 2-dimensional motion vectors (at render resolution if FFX_FSR_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS is not set). + struct FfxApiResource exposure; ///< Optional resource containing a 1x1 exposure value. + struct FfxApiResource reactive; ///< Optional resource containing alpha value of reactive objects in the scene. + struct FfxApiResource transparencyAndComposition; ///< Optional resource containing alpha value of special objects in the scene. + struct FfxApiResource output; ///< Output color buffer for the current frame (at presentation resolution). + struct FfxApiFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera. + struct FfxApiFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors. + struct FfxApiDimensions2D renderSize; ///< The resolution that was used for rendering the input resources. + struct FfxApiDimensions2D upscaleSize; ///< The resolution that the upscaler will upscale to (optional, assumed maxUpscaleSize otherwise). + bool enableSharpening; ///< Enable an additional sharpening pass. + float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness. + float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds). + float preExposure; ///< The pre exposure value (must be > 0.0f) + bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously. + float cameraNear; ///< The distance to the near plane of the camera. + float cameraFar; ///< The distance to the far plane of the camera. + float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians). + float viewSpaceToMetersFactor; ///< The scale factor to convert view space units to meters + uint32_t flags; ///< Zero or a combination of values from FfxApiDispatchFsrUpscaleFlags. +}; + +#define FFX_API_QUERY_DESC_TYPE_UPSCALE_GETUPSCALERATIOFROMQUALITYMODE 0x00010002u +struct ffxQueryDescUpscaleGetUpscaleRatioFromQualityMode +{ + ffxQueryDescHeader header; + uint32_t qualityMode; ///< The desired quality mode for FSR upscaling. + float* pOutUpscaleRatio; ///< A pointer to a float which will hold the upscaling the per-dimension upscaling ratio. +}; + +#define FFX_API_QUERY_DESC_TYPE_UPSCALE_GETRENDERRESOLUTIONFROMQUALITYMODE 0x00010003u +struct ffxQueryDescUpscaleGetRenderResolutionFromQualityMode +{ + ffxQueryDescHeader header; + uint32_t displayWidth; ///< The target display resolution width. + uint32_t displayHeight; ///< The target display resolution height. + uint32_t qualityMode; ///< The desired quality mode for FSR upscaling. + uint32_t* pOutRenderWidth; ///< A pointer to a uint32_t which will hold the calculated render resolution width. + uint32_t* pOutRenderHeight; ///< A pointer to a uint32_t which will hold the calculated render resolution height. +}; + +#define FFX_API_QUERY_DESC_TYPE_UPSCALE_GETJITTERPHASECOUNT 0x00010004u +struct ffxQueryDescUpscaleGetJitterPhaseCount +{ + ffxQueryDescHeader header; + uint32_t renderWidth; ///< The render resolution width. + uint32_t displayWidth; ///< The output resolution width. + int32_t* pOutPhaseCount; ///< A pointer to a int32_t which will hold the jitter phase count for the scaling factor between renderWidth and displayWidth. +}; + +#define FFX_API_QUERY_DESC_TYPE_UPSCALE_GETJITTEROFFSET 0x00010005u +struct ffxQueryDescUpscaleGetJitterOffset +{ + ffxQueryDescHeader header; + int32_t index; ///< The index within the jitter sequence. + int32_t phaseCount; ///< The length of jitter phase. See ffxQueryDescFsrGetJitterPhaseCount. + float* pOutX; ///< A pointer to a float which will contain the subpixel jitter offset for the x dimension. + float* pOutY; ///< A pointer to a float which will contain the subpixel jitter offset for the y dimension. +}; + +#define FFX_API_DISPATCH_DESC_TYPE_UPSCALE_GENERATEREACTIVEMASK 0x00010006u +struct ffxDispatchDescUpscaleGenerateReactiveMask +{ + ffxDispatchDescHeader header; + void* commandList; ///< The FfxCommandList to record FSRUPSCALE rendering commands into. + struct FfxApiResource colorOpaqueOnly; ///< A FfxResource containing the opaque only color buffer for the current frame (at render resolution). + struct FfxApiResource colorPreUpscale; ///< A FfxResource containing the opaque+translucent color buffer for the current frame (at render resolution). + struct FfxApiResource outReactive; ///< A FfxResource containing the surface to generate the reactive mask into. + struct FfxApiDimensions2D renderSize; ///< The resolution that was used for rendering the input resources. + float scale; ///< A value to scale the output + float cutoffThreshold; ///< A threshold value to generate a binary reactive mask + float binaryValue; ///< A value to set for the binary reactive mask + uint32_t flags; ///< Flags to determine how to generate the reactive mask +}; + +#define FFX_API_CONFIGURE_DESC_TYPE_UPSCALE_KEYVALUE 0x00010007u +struct ffxConfigureDescUpscaleKeyValue +{ + ffxConfigureDescHeader header; + uint64_t key; ///< Configuration key, member of the FfxApiConfigureUpscaleKey enumeration. + uint64_t u64; ///< Integer value or enum value to set. + void* ptr; ///< Pointer to set or pointer to value to set. +}; + +enum FfxApiConfigureUpscaleKey +{ + FFX_API_CONFIGURE_UPSCALE_KEY_FVELOCITYFACTOR = 0 //Override constant buffer fVelocityFactor (from 1.0f at context creation) to floating point value casted from void * ptr. Value of 0.0f can improve temporal stability of bright pixels. Value is clamped to [0.0f, 1.0f]. +}; + +#ifdef __cplusplus +} +#endif diff --git a/src/core/math.h b/src/core/math.h index 8b34a6c0eb..31cf51ae1a 100644 --- a/src/core/math.h +++ b/src/core/math.h @@ -25,6 +25,7 @@ struct LUMIX_CORE_API IVec2 { IVec2 operator / (const IVec2& rhs) const { return IVec2(x / rhs.x, y / rhs.y); } inline Vec2 operator / (const Vec2& rhs) const; bool operator !=(const IVec2& rhs) const { return rhs.x != x || rhs.y != y; } + bool operator ==(const IVec2& rhs) const { return rhs.x == x && rhs.y == y; } i32 x; i32 y; diff --git a/src/renderer/draw_stream.cpp b/src/renderer/draw_stream.cpp index 3740d85626..ea7f8b4818 100644 --- a/src/renderer/draw_stream.cpp +++ b/src/renderer/draw_stream.cpp @@ -39,10 +39,8 @@ enum class DrawStream::Instruction : u8 { DRAW_INDEXED_INSTANCED, MEMORY_BARRIER, MEMORY_BARRIER_TEXTURE, - BARRIER_READ, - BARRIER_WRITE, - BARRIER_READ_BUF, - BARRIER_WRITE_BUF, + TEXTURE_BARRIER, + BUFFER_BARRIER, DRAW_INDIRECT, BIND_SHADER_BUFFER, DISPATCH, @@ -134,6 +132,17 @@ struct DrawIndexedInstancedDat { u32 instances_count; gpu::DataType index_type; }; + +struct TextureBarrierData { + gpu::TextureHandle texture; + gpu::BarrierType type; +}; + +struct BufferBarrierData { + gpu::BufferHandle buffer; + gpu::BarrierType type; +}; + struct DrawIndirectData { gpu::DataType index_type; u32 indirect_buffer_offset; @@ -537,20 +546,14 @@ void DrawStream::drawIndirect(gpu::DataType index_type, u32 indirect_buffer_offs write(Instruction::DRAW_INDIRECT, data); } -void DrawStream::barrierRead(gpu::TextureHandle texture) { - write(Instruction::BARRIER_READ, texture); +void DrawStream::barrier(gpu::TextureHandle texture, gpu::BarrierType type) { + TextureBarrierData data = {texture, type}; + write(Instruction::TEXTURE_BARRIER, data); } -void DrawStream::barrierWrite(gpu::TextureHandle texture) { - write(Instruction::BARRIER_WRITE, texture); -} - -void DrawStream::barrierRead(gpu::BufferHandle buffer) { - write(Instruction::BARRIER_READ_BUF, buffer); -} - -void DrawStream::barrierWrite(gpu::BufferHandle buffer) { - write(Instruction::BARRIER_WRITE_BUF, buffer); +void DrawStream::barrier(gpu::BufferHandle buffer, gpu::BarrierType type) { + BufferBarrierData data = {buffer, type}; + write(Instruction::BUFFER_BARRIER, data); } void DrawStream::memoryBarrier(gpu::BufferHandle buffer) { @@ -743,24 +746,14 @@ void DrawStream::run() { gpu::memoryBarrier(texture); break; } - case Instruction::BARRIER_READ: { - READ(gpu::TextureHandle, texture); - gpu::barrierRead(texture); - break; - } - case Instruction::BARRIER_WRITE: { - READ(gpu::TextureHandle, texture); - gpu::barrierWrite(texture); + case Instruction::TEXTURE_BARRIER: { + READ(TextureBarrierData, data); + gpu::barrier(data.texture, data.type); break; } - case Instruction::BARRIER_READ_BUF: { - READ(gpu::BufferHandle, buffer); - gpu::barrierRead(buffer); - break; - } - case Instruction::BARRIER_WRITE_BUF: { - READ(gpu::BufferHandle, buffer); - gpu::barrierWrite(buffer); + case Instruction::BUFFER_BARRIER: { + READ(BufferBarrierData, data); + gpu::barrier(data.buffer, data.type); break; } case Instruction::POP_DEBUG_GROUP: diff --git a/src/renderer/draw_stream.h b/src/renderer/draw_stream.h index 987065d0b7..8baaabd7eb 100644 --- a/src/renderer/draw_stream.h +++ b/src/renderer/draw_stream.h @@ -54,10 +54,8 @@ struct DrawStream { void drawIndexedInstanced(u32 indices_count, u32 instances_count, gpu::DataType index_type); void dispatch(u32 num_groups_x, u32 num_groups_y, u32 num_groups_z); - void barrierRead(gpu::TextureHandle texture); - void barrierWrite(gpu::TextureHandle texture); - void barrierRead(gpu::BufferHandle buffer); - void barrierWrite(gpu::BufferHandle buffer); + void barrier(gpu::TextureHandle texture, gpu::BarrierType type); + void barrier(gpu::BufferHandle buffer, gpu::BarrierType type); void memoryBarrier(gpu::BufferHandle buffer); void memoryBarrier(gpu::TextureHandle texture); diff --git a/src/renderer/editor/render_plugins.cpp b/src/renderer/editor/render_plugins.cpp index f981a7e177..c7f6f1cc1b 100644 --- a/src/renderer/editor/render_plugins.cpp +++ b/src/renderer/editor/render_plugins.cpp @@ -3729,7 +3729,7 @@ struct EnvironmentProbePlugin final : PropertyGrid::IPlugin { // radiance filter enum { roughness_levels = 5 }; stream.useProgram(m_ibl_filter_program); - stream.barrierRead(cubemap); + stream.barrier(cubemap, gpu::BarrierType::READ); gpu::TextureHandle filtered = gpu::allocTextureHandle(); stream.createTexture(filtered, job.reflection_probe.size, job.reflection_probe.size, 1, gpu::TextureFormat::RGBA32F, gpu::TextureFlags::IS_CUBE | gpu::TextureFlags::RENDER_TARGET | gpu::TextureFlags::COMPUTE_WRITE, "probe_filtered"); diff --git a/src/renderer/fsr3.cpp b/src/renderer/fsr3.cpp new file mode 100644 index 0000000000..aa0b5391e6 --- /dev/null +++ b/src/renderer/fsr3.cpp @@ -0,0 +1,251 @@ +#include "core/job_system.h" +#include "core/log.h" +#include "core/os.h" +#include "core/tag_allocator.h" +#include "engine/engine.h" +#include "ffx_api/ffx_api.h" +#include "ffx_api/ffx_upscale.h" +#include "ffx_api/dx12/ffx_api_dx12.h" +#undef near +#undef far +#undef NEAR +#undef RELATIVE +#undef FAR +#include "pipeline.h" +#include "renderer.h" + +namespace Lumix { + +namespace gpu { + void* getDX12Device(); + void* getDX12CommandList(); + void* getDX12Resource(TextureHandle h); + void resetCommandList(); +} + +enum class FfxResourceStates { + FFX_RESOURCE_STATE_COMMON = (1 << 0), + FFX_RESOURCE_STATE_UNORDERED_ACCESS = (1 << 1), + FFX_RESOURCE_STATE_COMPUTE_READ = (1 << 2), + FFX_RESOURCE_STATE_PIXEL_READ = (1 << 3), + FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ = (FFX_RESOURCE_STATE_PIXEL_READ | FFX_RESOURCE_STATE_COMPUTE_READ), + FFX_RESOURCE_STATE_COPY_SRC = (1 << 4), + FFX_RESOURCE_STATE_COPY_DEST = (1 << 5), + FFX_RESOURCE_STATE_GENERIC_READ = (FFX_RESOURCE_STATE_COPY_SRC | FFX_RESOURCE_STATE_COMPUTE_READ), + FFX_RESOURCE_STATE_INDIRECT_ARGUMENT = (1 << 6), + FFX_RESOURCE_STATE_PRESENT = (1 << 7), + FFX_RESOURCE_STATE_RENDER_TARGET = (1 << 8), +}; + +static PfnFfxCreateContext api_ffxCreateContext; +static PfnFfxDestroyContext api_ffxDestroyContext; +static PfnFfxDispatch api_ffxDispatch; +static PfnFfxConfigure api_ffxConfigure; +static PfnFfxQuery api_ffxQuery; + +struct FSR3Plugin : RenderPlugin { + FSR3Plugin(IAllocator& allocator) + : m_allocator(allocator, "FSR3") + , m_contexts(allocator) + { + m_alloc_callbacks.pUserData = this; + m_alloc_callbacks.alloc = [](void* user_data, uint64_t size) -> void* { + return static_cast(user_data)->m_allocator.allocate(size, 8); + }; + m_alloc_callbacks.dealloc = [](void* user_data, void* ptr) { + static_cast(user_data)->m_allocator.deallocate(ptr); + }; + } + + static void ffxMessageCallback(uint32_t type, const wchar_t* message) { + ASSERT(false); + } + + struct Context { + IVec2 size; + Pipeline* pipeline; + ffxContext fsr; + u32 frames_since_last_use = 0; + }; + + void frame(Renderer&) override { + for (i32 i = m_contexts.size() - 1; i >= 0; --i) { + UniquePtr& ctx = m_contexts[i]; + ++ctx->frames_since_last_use; + if (ctx->frames_since_last_use < 6) continue; + + ffxReturnCode_t retCode = api_ffxDestroyContext(&ctx->fsr, &m_alloc_callbacks); + m_contexts.swapAndPop(i); + if (retCode != FFX_API_RETURN_OK) { + logError("Failed to destroy FSR3 context"); + } + } + } + + Context& getOrCreateContext(Pipeline& pipeline) { + const Viewport& vp = pipeline.getViewport(); + IVec2 size = {vp.w, vp.h}; + // look for existing context + for (const UniquePtr& ctx : m_contexts) { + if (ctx->pipeline == &pipeline) { + ctx->frames_since_last_use = 0; + if (ctx->size == size) return *ctx.get(); + + // we found context with different size, mark it for deletion + // we can't delete it here because gpu might still be using it + ctx->pipeline = nullptr; // set to nullptr so we don't reuse it + break; + } + } + + // new context + UniquePtr ctx = UniquePtr::create(m_allocator); + ctx->pipeline = &pipeline; + ctx->size = size; + Context* ctx_ptr = ctx.get(); + m_contexts.push(ctx.move()); + + DrawStream& stream = pipeline.getRenderer().getDrawStream(); + // init gpu resources + stream.pushLambda([ctx_ptr, size, this](){ + ffxCreateBackendDX12Desc createBackend = { + .header = { + .type = FFX_API_CREATE_CONTEXT_DESC_TYPE_BACKEND_DX12, + }, + .device = (ID3D12Device*)gpu::getDX12Device(), + }; + + ffxCreateContextDescUpscale createUpscale = { + .flags = FFX_UPSCALE_ENABLE_DEPTH_INVERTED | FFX_UPSCALE_ENABLE_DEPTH_INFINITE | FFX_UPSCALE_ENABLE_HIGH_DYNAMIC_RANGE, + .maxRenderSize = { (u32)size.x, (u32)size.y }, + .maxUpscaleSize = { (u32)size.x, (u32)size.y }, + }; + + #ifdef LUMIX_DEBUG + createUpscale.flags |= FFX_UPSCALE_ENABLE_DEBUG_CHECKING; + createUpscale.fpMessage = &ffxMessageCallback; + #endif + createUpscale.header.type = FFX_API_CREATE_CONTEXT_DESC_TYPE_UPSCALE; + createUpscale.header.pNext = &createBackend.header; + + ffxReturnCode_t retCode = api_ffxCreateContext(&ctx_ptr->fsr, &createUpscale.header, &m_alloc_callbacks); + if (retCode != FFX_API_RETURN_OK) { + logError("Failed to create FSR3 context"); + } + }); + + return *ctx_ptr; + } + + RenderBufferHandle renderAA(const GBuffer& gbuffer, RenderBufferHandle input, Pipeline& pipeline) override { + DrawStream& stream = pipeline.getRenderer().getDrawStream(); + + Context& ctx = getOrCreateContext(pipeline); + + gpu::TextureHandle color = pipeline.toTexture(input); + gpu::TextureHandle depth = pipeline.toTexture(gbuffer.DS); + gpu::TextureHandle motion_vectors = pipeline.toTexture(gbuffer.D); + + RenderBufferHandle output = pipeline.createRenderbuffer({ + .format = gpu::TextureFormat::RGBA16F, + .flags = gpu::TextureFlags::RENDER_TARGET | gpu::TextureFlags::NO_MIPS | gpu::TextureFlags::COMPUTE_WRITE, + .debug_name = "fsr3_output" + }); + const float time_delta = pipeline.getRenderer().getEngine().getLastTimeDelta(); + gpu::TextureHandle output_tex = pipeline.toTexture(output); + const Viewport& vp = pipeline.getViewport(); + + pipeline.enablePixelJitter(true); + pipeline.beginBlock("FSR3 Upscale"); + stream.pushLambda([color, depth, motion_vectors, output_tex, vp, time_delta, ctx_ptr = &ctx](){ + dispatch(color, depth, motion_vectors, output_tex, vp, time_delta, *ctx_ptr); + }); + pipeline.endBlock(); + + return output; + } + + static FfxApiResource toFFXResource(gpu::TextureHandle texture, FfxResourceStates state, bool is_depth, IVec2 size) { + FfxApiResource res = {}; + res.resource = gpu::getDX12Resource(texture); + res.state = (u32)state; + res.description = { + .type = FFX_API_RESOURCE_TYPE_TEXTURE2D, + .flags = 0, + .usage = u32(is_depth ? FFX_API_RESOURCE_USAGE_DEPTHTARGET : FFX_API_RESOURCE_USAGE_READ_ONLY), + }; + + res.description.mipCount = 1; + res.description.depth = 1; + res.description.width = size.x; + res.description.height = size.y; + + return res; + } + + static void dispatch(gpu::TextureHandle color, gpu::TextureHandle depth, gpu::TextureHandle motion_vectors, gpu::TextureHandle output, const Viewport& vp, float time_delta, Context& ctx) { + const IVec2 size = { (int)vp.w, (int)vp.h }; + gpu::barrier(color, gpu::BarrierType::COMMON); + gpu::barrier(depth, gpu::BarrierType::COMMON); + gpu::barrier(motion_vectors, gpu::BarrierType::COMMON); + gpu::barrier(output, gpu::BarrierType::COMMON); + ffxDispatchDescUpscale desc = { + .header = { .type = FFX_API_DISPATCH_DESC_TYPE_UPSCALE }, + .commandList = gpu::getDX12CommandList(), + .color = toFFXResource(color, FfxResourceStates::FFX_RESOURCE_STATE_COMMON, false, size), + .depth = toFFXResource(depth, FfxResourceStates::FFX_RESOURCE_STATE_COMMON, true, size), + .motionVectors = toFFXResource(motion_vectors, FfxResourceStates::FFX_RESOURCE_STATE_COMMON, false, size), + .output = toFFXResource(output, FfxResourceStates::FFX_RESOURCE_STATE_COMMON, false, size), + .jitterOffset = { vp.pixel_offset.x, vp.pixel_offset.y }, + .motionVectorScale = { 0.5f * vp.w, -0.5f * vp.h }, + .renderSize = {(u32)vp.w, (u32)vp.h}, // The resolution that was used for rendering the input resources. + .upscaleSize = {(u32)vp.w, (u32)vp.h}, // The resolution that the upscaler will upscale to + .enableSharpening = false, + .frameTimeDelta = time_delta * 1000.f, + .preExposure = 1.f, + .reset = false, + .cameraNear = FLT_MAX, + .cameraFar = FLT_MAX, + .cameraFovAngleVertical = vp.fov, + .viewSpaceToMetersFactor = 1.f, + .flags = 0 //FFX_UPSCALE_FLAG_DRAW_DEBUG_VIEW + }; + ffxReturnCode_t retCode = api_ffxDispatch(&ctx.fsr, &desc.header); + ASSERT(retCode == FFX_API_RETURN_OK); + gpu::resetCommandList(); + } + + TagAllocator m_allocator; + Array> m_contexts; + ffxAllocationCallbacks m_alloc_callbacks; +}; + +// init FSR3 if available +void initFSR3(Renderer& renderer, IAllocator& allocator) { + void* ffx_lib = os::loadLibrary("amd_fidelityfx_dx12.dll"); + if (!ffx_lib) return; + + logInfo("Loaded amd_fidelityfx_dx12.dll"); + + #define LOAD_FN(name) \ + api_##name = (decltype(api_##name))os::getLibrarySymbol(ffx_lib, #name); \ + if (!api_##name) { \ + logError("Failed to load ", #name, " from amd_fidelityfx_dx12.dll"); \ + os::unloadLibrary(ffx_lib); \ + return; \ + } + + LOAD_FN(ffxCreateContext); + LOAD_FN(ffxDestroyContext); + LOAD_FN(ffxConfigure); + LOAD_FN(ffxQuery); + LOAD_FN(ffxDispatch); + + #undef LOAD_FN + + static FSR3Plugin plugin(allocator); + renderer.addPlugin(plugin); + renderer.enableBuiltinTAA(false); +} + +} \ No newline at end of file diff --git a/src/renderer/gpu/gpu.h b/src/renderer/gpu/gpu.h index 97bd0688c3..2569e174e4 100644 --- a/src/renderer/gpu/gpu.h +++ b/src/renderer/gpu/gpu.h @@ -44,6 +44,12 @@ enum class InitFlags : u32 { STABLE_POWER_STATE = 1 << 1 }; +enum class BarrierType : u8 { + READ, + WRITE, + COMMON, +}; + enum class FramebufferFlags : u32 { NONE = 0, SRGB = 1 << 0, @@ -291,10 +297,8 @@ void createTextureView(TextureHandle view, TextureHandle texture, u32 layer, u32 void memoryBarrier(BufferHandle buffer); void memoryBarrier(TextureHandle texture); -void barrierWrite(TextureHandle texture); -void barrierRead(TextureHandle texture); -void barrierWrite(BufferHandle buffer); -void barrierRead(BufferHandle buffer); +void barrier(TextureHandle texture, BarrierType type); +void barrier(BufferHandle buffer, BarrierType type); void destroy(TextureHandle texture); void destroy(BufferHandle buffer); diff --git a/src/renderer/gpu/gpu_dx12.cpp b/src/renderer/gpu/gpu_dx12.cpp index 9b95ee0386..e2822ae26a 100644 --- a/src/renderer/gpu/gpu_dx12.cpp +++ b/src/renderer/gpu/gpu_dx12.cpp @@ -1120,6 +1120,13 @@ struct D3D { static Local d3d; +void resetCommandList() { + d3d->cmd_list->SetGraphicsRootSignature(d3d->root_signature); + d3d->cmd_list->SetComputeRootSignature(d3d->root_signature); + ID3D12DescriptorHeap* heaps[] = {d3d->srv_heap.heap, d3d->sampler_heap.heap}; + d3d->cmd_list->SetDescriptorHeaps(lengthOf(heaps), heaps); +} + void* getDX12CommandList() { return d3d->cmd_list; } @@ -1132,24 +1139,26 @@ void* getDX12Resource(TextureHandle h) { return h->resource; } -void barrierWrite(BufferHandle buffer) { - buffer->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); -} - -void barrierRead(BufferHandle buffer) { - buffer->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_GENERIC_READ); -} - -void barrierWrite(TextureHandle texture) { - texture->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); +void barrier(BufferHandle buffer, BarrierType type) { + switch(type) { + case BarrierType::WRITE: buffer->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); break; + case BarrierType::READ: buffer->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_GENERIC_READ); break; + case BarrierType::COMMON: buffer->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_COMMON); break; + } } -void barrierRead(TextureHandle texture) { - if (isDepthFormat(texture->dxgi_format)) { - texture->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - } - else { - texture->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_GENERIC_READ); +void barrier(TextureHandle texture, BarrierType type) { + switch(type) { + case BarrierType::WRITE: texture->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); break; + case BarrierType::READ: + if (isDepthFormat(texture->dxgi_format)) { + texture->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } + else { + texture->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_GENERIC_READ); + } + break; + case BarrierType::COMMON: texture->setState(d3d->cmd_list, D3D12_RESOURCE_STATE_COMMON); break; } } @@ -1921,10 +1930,8 @@ bool init(void* hwnd, InitFlags flags) { d3d->frame->stats_query_buffer->Map(0, nullptr, (void**)&d3d->frame->stats_query_buffer_ptr); d3d->frame->cmd_allocator->Reset(); d3d->cmd_list->Reset(d3d->frame->cmd_allocator, nullptr); - d3d->cmd_list->SetGraphicsRootSignature(d3d->root_signature); - d3d->cmd_list->SetComputeRootSignature(d3d->root_signature); - ID3D12DescriptorHeap* heaps[] = {d3d->srv_heap.heap, d3d->sampler_heap.heap }; - d3d->cmd_list->SetDescriptorHeaps(lengthOf(heaps), heaps); + + resetCommandList(); if (!createSwapchain((HWND)hwnd, d3d->windows[0], d3d->vsync)) return false; diff --git a/src/renderer/pipeline.cpp b/src/renderer/pipeline.cpp index 1f55d723e8..7570e4481f 100644 --- a/src/renderer/pipeline.cpp +++ b/src/renderer/pipeline.cpp @@ -1098,7 +1098,7 @@ struct PipelineImpl final : Pipeline { }); setRenderTargets({}, shadowmap_rb); clear(gpu::ClearFlags::DEPTH, 0, 0, 0, 0, 0); - stream.barrierRead(m_renderbuffers[shadowmap_rb].handle); + stream.barrier(m_renderbuffers[shadowmap_rb].handle, gpu::BarrierType::READ); endBlock(); return shadowmap_rb; } @@ -1129,7 +1129,7 @@ struct PipelineImpl final : Pipeline { renderTerrains(view_params, shadow_state, "DEPTH"); } endBlock(); - stream.barrierRead(m_renderbuffers[shadowmap_rb].handle); + stream.barrier(m_renderbuffers[shadowmap_rb].handle, gpu::BarrierType::READ); return shadowmap_rb; } @@ -1369,7 +1369,7 @@ struct PipelineImpl final : Pipeline { gpu::BindlessHandle toBindless(RenderBufferHandle rb_idx, DrawStream& stream) override { if (rb_idx == INVALID_RENDERBUFFER) return gpu::INVALID_BINDLESS_HANDLE; - stream.barrierRead(m_renderbuffers[rb_idx].handle); + stream.barrier(m_renderbuffers[rb_idx].handle, gpu::BarrierType::READ); return gpu::getBindlessHandle(m_renderbuffers[rb_idx].handle); } @@ -1413,7 +1413,7 @@ struct PipelineImpl final : Pipeline { gpu::getRWBindlessHandle(mip_views[4]), }; - stream.barrierWrite(toTexture(m_downscaled_depth)); + stream.barrier(toTexture(m_downscaled_depth), gpu::BarrierType::WRITE); setUniform(udata); dispatch(*m_downscale_depth_shader, (m_viewport.w + 7) / 8, (m_viewport.h + 7) / 8, 1); @@ -1424,7 +1424,7 @@ struct PipelineImpl final : Pipeline { gpu::RWBindlessHandle toRWBindless(RenderBufferHandle rb_idx, DrawStream& stream) override { if (rb_idx == INVALID_RENDERBUFFER) return gpu::INVALID_RW_BINDLESS_HANDLE; - stream.barrierWrite(m_renderbuffers[rb_idx].handle); + stream.barrier(m_renderbuffers[rb_idx].handle, gpu::BarrierType::WRITE); return gpu::getRWBindlessHandle(m_renderbuffers[rb_idx].handle); } @@ -2358,20 +2358,20 @@ struct PipelineImpl final : Pipeline { stream.bindUniformBuffer(UniformBuffer::DRAWCALL, drawcall_ub.buffer, drawcall_ub.offset, sizeof(UBValues)); - stream.barrierWrite(m_instanced_meshes_buffer); - stream.barrierWrite(m_indirect_buffer); - stream.barrierWrite(culled_buffer); + stream.barrier(m_instanced_meshes_buffer, gpu::BarrierType::WRITE); + stream.barrier(m_indirect_buffer, gpu::BarrierType::WRITE); + stream.barrier(culled_buffer, gpu::BarrierType::WRITE); //stream.bindShaderBuffer(culled_buffer, 1, gpu::BindShaderBufferFlags::OUTPUT); stream.useProgram(init_shader); stream.dispatch(1, 1, 1); stream.memoryBarrier(culled_buffer); if (view.cp.is_shadow) { - stream.barrierRead(im.gpu_data); + stream.barrier(im.gpu_data, gpu::BarrierType::READ); //stream.bindShaderBuffer(im.gpu_data, 0, gpu::BindShaderBufferFlags::NONE); } else { - stream.barrierWrite(im.gpu_data); + stream.barrier(im.gpu_data, gpu::BarrierType::WRITE); //stream.bindShaderBuffer(im.gpu_data, 0, gpu::BindShaderBufferFlags::OUTPUT); stream.useProgram(update_lods_shader); for (u32 i = 0; i < cell_count; ++i) { @@ -2392,7 +2392,7 @@ struct PipelineImpl final : Pipeline { stream.memoryBarrier(culled_buffer); if (!view.cp.is_shadow) { - stream.barrierRead(im.gpu_data); + stream.barrier(im.gpu_data, gpu::BarrierType::READ); //stream.bindShaderBuffer(im.gpu_data, 0, gpu::BindShaderBufferFlags::NONE); } @@ -3012,7 +3012,7 @@ struct PipelineImpl final : Pipeline { buffer.capacity = capacity; } stream.update(buffer.buffer, data, size); - stream.barrierRead(buffer.buffer); + stream.barrier(buffer.buffer, gpu::BarrierType::READ); }; const Span module_refl_probes = m_module->getReflectionProbes(); const Span module_env_probes = m_module->getEnvironmentProbes(); diff --git a/src/renderer/postprocess.h b/src/renderer/postprocess.h index e18f711656..96dd6c8267 100644 --- a/src/renderer/postprocess.h +++ b/src/renderer/postprocess.h @@ -97,15 +97,15 @@ struct Atmo : public RenderPlugin { gpu::INVALID_BINDLESS_HANDLE }; - stream.barrierWrite(m_optical_depth_precomputed); + stream.barrier(m_optical_depth_precomputed, gpu::BarrierType::WRITE); pipeline.beginBlock("precompute_transmittance"); pipeline.setUniform(ub_data); pipeline.dispatch(*m_optical_depth_shader, 128 / 16, 128 / 16, 1); pipeline.endBlock(); - stream.barrierWrite(m_inscatter_precomputed); + stream.barrier(m_inscatter_precomputed, gpu::BarrierType::WRITE); stream.memoryBarrier(m_optical_depth_precomputed); - stream.barrierRead(m_optical_depth_precomputed); + stream.barrier(m_optical_depth_precomputed, gpu::BarrierType::READ); pipeline.beginBlock("precompute_inscatter"); ub_data.resolution = Vec4(64, 128, 1, 0); @@ -115,7 +115,7 @@ struct Atmo : public RenderPlugin { pipeline.endBlock(); stream.memoryBarrier(m_inscatter_precomputed); - stream.barrierRead(m_inscatter_precomputed); + stream.barrier(m_inscatter_precomputed, gpu::BarrierType::READ); ub_data.inscatter_precomputed = gpu::getBindlessHandle(m_inscatter_precomputed); ub_data.optical_depth = gpu::getBindlessHandle(m_optical_depth_precomputed); @@ -354,7 +354,7 @@ struct Bloom : public RenderPlugin { gpu::getRWBindlessHandle(m_lum_buf) }; pipeline.setUniform(ubdata); - stream.barrierWrite(m_lum_buf); + stream.barrier(m_lum_buf, gpu::BarrierType::WRITE); stream.memoryBarrier(m_lum_buf); pipeline.dispatch(*m_avg_luminance_shader, 1, 1, 1, "PASS0"); stream.memoryBarrier(m_lum_buf); @@ -362,7 +362,7 @@ struct Bloom : public RenderPlugin { stream.memoryBarrier(m_lum_buf); pipeline.dispatch(*m_avg_luminance_shader, 1, 1, 1, "PASS2"); stream.memoryBarrier(m_lum_buf); - stream.barrierRead(m_lum_buf); + stream.barrier(m_lum_buf, gpu::BarrierType::READ); pipeline.endBlock(); } @@ -486,7 +486,7 @@ struct Bloom : public RenderPlugin { pipeline.toBindless(input, stream), pipeline.toRWBindless(bloom_rb, stream) }; - stream.barrierRead(m_lum_buf); + stream.barrier(m_lum_buf, gpu::BarrierType::READ); pipeline.setUniform(ubdata); pipeline.dispatch(*m_extract_shader, ((vp.w >> 1) + 15) / 16, ((vp.h >> 1) + 15) / 16, 1); m_extracted_rt = bloom_rb; @@ -572,7 +572,7 @@ struct Bloom : public RenderPlugin { pipeline.toRWBindless(rb, stream) }; - stream.barrierRead(m_lum_buf); + stream.barrier(m_lum_buf, gpu::BarrierType::READ); pipeline.setUniform(ubdata); const Viewport& vp = pipeline.getViewport(); pipeline.dispatch(*m_tonemap_shader, (vp.w + 15) / 16, (vp.h + 15) / 16, 1); @@ -914,7 +914,7 @@ struct SSAO : public RenderPlugin { pipeline.beginBlock("ssao_blit"); pipeline.setUniform(udata2); - stream.barrierWrite(pipeline.toTexture(gbuffer.B)); + stream.barrier(pipeline.toTexture(gbuffer.B), gpu::BarrierType::WRITE); pipeline.dispatch(*m_blit_shader, (vp.w + 15) / 16, (vp.h + 15) / 16, 1); pipeline.endBlock(); pipeline.endBlock(); @@ -1098,7 +1098,7 @@ struct TAA : public RenderPlugin { if (!m_enabled) { data->history_rb = INVALID_RENDERBUFFER; pipeline.enablePixelJitter(false); - return hdr_buffer; + return INVALID_RENDERBUFFER; } pipeline.enablePixelJitter(true); diff --git a/src/renderer/renderer.cpp b/src/renderer/renderer.cpp index 988efbbf3b..1ba03e86ad 100644 --- a/src/renderer/renderer.cpp +++ b/src/renderer/renderer.cpp @@ -33,7 +33,6 @@ namespace Lumix { - static const ComponentType MODEL_INSTANCE_TYPE = reflection::getComponentType("model_instance"); RenderBufferHandle RenderPlugin::renderBeforeTonemap(const GBuffer& gbuffer, RenderBufferHandle input, Pipeline& pipeline) { return input; } @@ -43,6 +42,7 @@ bool RenderPlugin::tonemap(RenderBufferHandle input, RenderBufferHandle& output, bool RenderPlugin::debugOutput(RenderBufferHandle input, Pipeline& pipeline) { return false; } RenderBufferHandle RenderPlugin::renderAA(const GBuffer& gbuffer, RenderBufferHandle input, Pipeline& pipeline) { return INVALID_RENDERBUFFER; } +void initFSR3(Renderer& renderer, IAllocator& allocator); template struct TransientBuffer { @@ -615,6 +615,8 @@ struct RendererImpl final : Renderer { m_font_manager = LUMIX_NEW(m_allocator, FontManager)(*this, m_allocator); m_font_manager->create(FontResource::TYPE, manager); m_layers.emplace("default"); + + initFSR3(*this, m_allocator); } @@ -758,6 +760,10 @@ struct RendererImpl final : Renderer { return m_layers.size() - 1; } + void enableBuiltinTAA(bool enable) { + m_taa.m_enabled = enable; + } + const Mesh** getSortKeyToMeshMap() const override { return m_sort_key_to_mesh_map.begin(); } diff --git a/src/renderer/renderer.h b/src/renderer/renderer.h index b0d95bcf42..6777f962bd 100644 --- a/src/renderer/renderer.h +++ b/src/renderer/renderer.h @@ -87,6 +87,7 @@ struct LUMIX_RENDERER_API Renderer : ISystem { virtual void freeSortKey(u32 key) = 0; virtual u32 getMaxSortKey() const = 0; virtual const Mesh** getSortKeyToMeshMap() const = 0; + virtual void enableBuiltinTAA(bool enable) = 0; virtual const char* getSemanticDefines(Span attributes) = 0;