diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index ddbdecb0aa1f..428e86fdb3f4 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -95,16 +95,13 @@ class D3D11DrawContext : public DrawContext { // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; - Framebuffer *GetCurrentRenderTarget() override { - return curRenderTarget_; - } - void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) override; + void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; void InvalidateCachedState() override; - void BindTextures(int start, int count, Texture **textures) override; + void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) override; void BindNativeTexture(int index, void *nativeTexture) override; void BindSamplerStates(int start, int count, SamplerState **states) override; void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override; @@ -1306,35 +1303,38 @@ class D3D11Framebuffer : public Framebuffer { Framebuffer *D3D11DrawContext::CreateFramebuffer(const FramebufferDesc &desc) { HRESULT hr; D3D11Framebuffer *fb = new D3D11Framebuffer(desc.width, desc.height); - if (desc.numColorAttachments) { - fb->colorFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - D3D11_TEXTURE2D_DESC descColor{}; - descColor.Width = desc.width; - descColor.Height = desc.height; - descColor.MipLevels = 1; - descColor.ArraySize = 1; - descColor.Format = fb->colorFormat; - descColor.SampleDesc.Count = 1; - descColor.SampleDesc.Quality = 0; - descColor.Usage = D3D11_USAGE_DEFAULT; - descColor.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; - descColor.CPUAccessFlags = 0; - descColor.MiscFlags = 0; - hr = device_->CreateTexture2D(&descColor, nullptr, &fb->colorTex); - if (FAILED(hr)) { - delete fb; - return nullptr; - } - hr = device_->CreateRenderTargetView(fb->colorTex, nullptr, &fb->colorRTView); - if (FAILED(hr)) { - delete fb; - return nullptr; - } - hr = device_->CreateShaderResourceView(fb->colorTex, nullptr, &fb->colorSRView); - if (FAILED(hr)) { - delete fb; - return nullptr; - } + + // We don't (yet?) support multiview for D3D11. Not sure if there's a way to do it. + // Texture arrays are supported but we don't have any other use cases yet. + _dbg_assert_(desc.numLayers == 1); + + fb->colorFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + D3D11_TEXTURE2D_DESC descColor{}; + descColor.Width = desc.width; + descColor.Height = desc.height; + descColor.MipLevels = 1; + descColor.ArraySize = 1; + descColor.Format = fb->colorFormat; + descColor.SampleDesc.Count = 1; + descColor.SampleDesc.Quality = 0; + descColor.Usage = D3D11_USAGE_DEFAULT; + descColor.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; + descColor.CPUAccessFlags = 0; + descColor.MiscFlags = 0; + hr = device_->CreateTexture2D(&descColor, nullptr, &fb->colorTex); + if (FAILED(hr)) { + delete fb; + return nullptr; + } + hr = device_->CreateRenderTargetView(fb->colorTex, nullptr, &fb->colorRTView); + if (FAILED(hr)) { + delete fb; + return nullptr; + } + hr = device_->CreateShaderResourceView(fb->colorTex, nullptr, &fb->colorSRView); + if (FAILED(hr)) { + delete fb; + return nullptr; } if (desc.z_stencil) { @@ -1381,7 +1381,7 @@ Framebuffer *D3D11DrawContext::CreateFramebuffer(const FramebufferDesc &desc) { return fb; } -void D3D11DrawContext::BindTextures(int start, int count, Texture **textures) { +void D3D11DrawContext::BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) { // Collect the resource views from the textures. ID3D11ShaderResourceView *views[MAX_BOUND_TEXTURES]; _assert_(start + count <= ARRAY_SIZE(views)); @@ -1701,8 +1701,9 @@ void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const Ren stepId_++; } -void D3D11DrawContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) { - _assert_(binding < MAX_BOUND_TEXTURES); +void D3D11DrawContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) { + _dbg_assert_(binding < MAX_BOUND_TEXTURES); + _dbg_assert_(layer == ALL_LAYERS || layer == 0); // No multiple layer support on D3D D3D11Framebuffer *fb = (D3D11Framebuffer *)fbo; switch (channelBit) { case FBChannel::FB_COLOR_BIT: diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index 162c8febec67..8de75b09d0a0 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -532,16 +532,13 @@ class D3D9Context : public DrawContext { // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; - Framebuffer *GetCurrentRenderTarget() override { - return curRenderTarget_; - } - void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) override; - + void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) override; + uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; - void BindTextures(int start, int count, Texture **textures) override; + void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) override; void BindNativeTexture(int index, void *nativeTexture) override; void BindSamplerStates(int start, int count, SamplerState **states) override { @@ -749,7 +746,6 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID } caps_.deviceID = identifier_.DeviceId; - caps_.multiViewport = false; caps_.depthRangeMinusOneToOne = false; caps_.preferredDepthBufferFormat = DataFormat::D24_S8; caps_.dualSourceBlend = false; @@ -915,7 +911,7 @@ Texture *D3D9Context::CreateTexture(const TextureDesc &desc) { return tex; } -void D3D9Context::BindTextures(int start, int count, Texture **textures) { +void D3D9Context::BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) { _assert_(start + count <= MAX_BOUND_TEXTURES); for (int i = start; i < start + count; i++) { D3D9Texture *tex = static_cast(textures[i - start]); @@ -1244,6 +1240,9 @@ class D3D9Framebuffer : public Framebuffer { }; Framebuffer *D3D9Context::CreateFramebuffer(const FramebufferDesc &desc) { + // Don't think D3D9 does array layers. + _dbg_assert_(desc.numLayers == 1); + static uint32_t id = 0; D3D9Framebuffer *fbo = new D3D9Framebuffer(desc.width, desc.height); @@ -1348,8 +1347,9 @@ uintptr_t D3D9Context::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit } } -void D3D9Context::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) { - _assert_(binding < MAX_BOUND_TEXTURES); +void D3D9Context::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) { + _dbg_assert_(binding < MAX_BOUND_TEXTURES); + _dbg_assert_(layer == ALL_LAYERS || layer == 0); // No stereo support D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; switch (channelBit) { case FB_DEPTH_BIT: diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 91f89e780703..bd8a252e1043 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -355,10 +355,7 @@ class OpenGLContext : public DrawContext { // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; - Framebuffer *GetCurrentRenderTarget() override { - return curRenderTarget_; - } - void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) override; + void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -400,7 +397,7 @@ class OpenGLContext : public DrawContext { curPipeline_->depthStencil->stencilPass); } - void BindTextures(int start, int count, Texture **textures) override; + void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) override; void BindNativeTexture(int sampler, void *nativeTexture) override; void BindPipeline(Pipeline *pipeline) override; @@ -1130,7 +1127,7 @@ Pipeline *OpenGLContext::CreateGraphicsPipeline(const PipelineDesc &desc, const } } -void OpenGLContext::BindTextures(int start, int count, Texture **textures) { +void OpenGLContext::BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) { _assert_(start + count <= MAX_TEXTURE_SLOTS); for (int i = start; i < start + count; i++) { OpenGLTexture *glTex = static_cast(textures[i - start]); @@ -1390,6 +1387,9 @@ void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) { Framebuffer *OpenGLContext::CreateFramebuffer(const FramebufferDesc &desc) { CheckGLExtensions(); + // TODO: Support multiview later. (It's our only use case for multi layers). + _dbg_assert_(desc.numLayers == 1); + GLRFramebuffer *framebuffer = renderManager_.CreateFramebuffer(desc.width, desc.height, desc.z_stencil); OpenGLFramebuffer *fbo = new OpenGLFramebuffer(&renderManager_, framebuffer); return fbo; @@ -1436,7 +1436,7 @@ bool OpenGLContext::BlitFramebuffer(Framebuffer *fbsrc, int srcX1, int srcY1, in return true; } -void OpenGLContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) { +void OpenGLContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) { OpenGLFramebuffer *fb = (OpenGLFramebuffer *)fbo; _assert_(binding < MAX_TEXTURE_SLOTS); diff --git a/Common/GPU/Shader.h b/Common/GPU/Shader.h index 4020a7b25302..6e294457347d 100644 --- a/Common/GPU/Shader.h +++ b/Common/GPU/Shader.h @@ -4,9 +4,12 @@ #include #include // for size_t +#include "Common/Common.h" + // GLSL_1xx and GLSL_3xx each cover a lot of sub variants. All the little quirks // that differ are covered in ShaderLanguageDesc. // Defined as a bitmask so stuff like GetSupportedShaderLanguages can return combinations. +// TODO: We can probably move away from this distinction soon, now that we mostly generate/translate shaders. enum ShaderLanguage { GLSL_1xx = 1, GLSL_3xx = 2, @@ -30,7 +33,6 @@ enum class ShaderStage { const char *ShaderStageAsString(ShaderStage lang); - struct ShaderLanguageDesc { ShaderLanguageDesc() {} explicit ShaderLanguageDesc(ShaderLanguage lang); @@ -91,13 +93,18 @@ struct UniformDef { int index; }; +enum class SamplerFlags { + ARRAY_ON_VULKAN = 1, +}; +ENUM_CLASS_BITOPS(SamplerFlags); + struct SamplerDef { int binding; // Might only be used by some backends. const char *name; + SamplerFlags flags; // TODO: Might need unsigned samplers, 3d samplers, or other types in the future. }; - // For passing error messages from shader compilation (and other critical issues) back to the host. // This can run on any thread - be aware! // TODO: See if we can find a less generic name for this. diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index e00a468e3591..ed7a3612f9d3 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -114,6 +114,9 @@ void ShaderWriter::Preamble(Slice extensions) { switch (lang_.shaderLanguage) { case GLSL_VULKAN: C("#version 450\n"); + if (flags_ & ShaderWriterFlags::FS_AUTO_STEREO) { + C("#extension GL_EXT_multiview : enable\n"); + } // IMPORTANT! Extensions must be the first thing after #version. for (size_t i = 0; i < extensions.size(); i++) { F("%s\n", extensions[i]); @@ -462,6 +465,10 @@ void ShaderWriter::DeclareSamplers(Slice samplers) { samplerDefs_ = samplers; } +void ShaderWriter::ApplySamplerMetadata(Slice samplers) { + samplerDefs_ = samplers; +} + void ShaderWriter::DeclareTexture2D(const SamplerDef &def) { switch (lang_.shaderLanguage) { case HLSL_D3D11: @@ -471,8 +478,12 @@ void ShaderWriter::DeclareTexture2D(const SamplerDef &def) { F("sampler %s: register(s%d);\n", def.name, def.binding); break; case GLSL_VULKAN: - // In the thin3d descriptor set layout, textures start at 1 in set 0. Hence the +1. - F("layout(set = 0, binding = %d) uniform sampler2D %s;\n", def.binding + texBindingBase_, def.name); + // texBindingBase_ is used for the thin3d descriptor set layout, where they start at 1. + if (def.flags & SamplerFlags::ARRAY_ON_VULKAN) { + F("layout(set = 0, binding = %d) uniform sampler2DArray %s;\n", def.binding + texBindingBase_, def.name); + } else { + F("layout(set = 0, binding = %d) uniform sampler2D %s;\n", def.binding + texBindingBase_, def.name); + } break; default: F("uniform sampler2D %s;\n", def.name); @@ -492,6 +503,7 @@ void ShaderWriter::DeclareSampler2D(const SamplerDef &def) { } ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv) { + const SamplerDef *samp = GetSamplerDef(sampName); switch (lang_.shaderLanguage) { case HLSL_D3D11: F("%s.Sample(%sSamp, %s)", sampName, sampName, uv); @@ -501,13 +513,20 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv break; default: // Note: we ignore the sampler. make sure you bound samplers to the textures correctly. - F("%s(%s, %s)", lang_.texture, sampName, uv); + if (samp && (samp->flags & SamplerFlags::ARRAY_ON_VULKAN) && lang_.shaderLanguage == GLSL_VULKAN) { + const char *index = (flags_ & ShaderWriterFlags::FS_AUTO_STEREO) ? "float(gl_ViewIndex)" : "0.0"; + F("%s(%s, vec3(%s, %s))", lang_.texture, sampName, uv, index); + } else { + F("%s(%s, %s)", lang_.texture, sampName, uv); + } break; } return *this; } ShaderWriter &ShaderWriter::SampleTexture2DOffset(const char *sampName, const char *uv, int offX, int offY) { + const SamplerDef *samp = GetSamplerDef(sampName); + switch (lang_.shaderLanguage) { case HLSL_D3D11: F("%s.Sample(%sSamp, %s, int2(%d, %d))", sampName, sampName, uv, offX, offY); @@ -518,13 +537,20 @@ ShaderWriter &ShaderWriter::SampleTexture2DOffset(const char *sampName, const ch break; default: // Note: we ignore the sampler. make sure you bound samplers to the textures correctly. - F("%sOffset(%s, %s, ivec2(%d, %d))", lang_.texture, sampName, uv, offX, offY); + if (samp && (samp->flags & SamplerFlags::ARRAY_ON_VULKAN) && lang_.shaderLanguage == GLSL_VULKAN) { + const char *index = (flags_ & ShaderWriterFlags::FS_AUTO_STEREO) ? "float(gl_ViewIndex)" : "0.0"; + F("%sOffset(%s, vec3(%s, %s), ivec2(%d, %d))", lang_.texture, sampName, uv, index, offX, offY); + } else { + F("%sOffset(%s, %s, ivec2(%d, %d))", lang_.texture, sampName, uv, offX, offY); + } break; } return *this; } ShaderWriter &ShaderWriter::LoadTexture2D(const char *sampName, const char *uv, int level) { + const SamplerDef *samp = GetSamplerDef(sampName); + switch (lang_.shaderLanguage) { case HLSL_D3D11: F("%s.Load(ivec3(%s, %d))", sampName, uv, level); @@ -535,7 +561,12 @@ ShaderWriter &ShaderWriter::LoadTexture2D(const char *sampName, const char *uv, break; default: // Note: we ignore the sampler. make sure you bound samplers to the textures correctly. - F("texelFetch(%s, %s, %d)", sampName, uv, level); + if (samp && (samp->flags & SamplerFlags::ARRAY_ON_VULKAN) && lang_.shaderLanguage == GLSL_VULKAN) { + const char *index = (flags_ & ShaderWriterFlags::FS_AUTO_STEREO) ? "gl_ViewIndex" : "0"; + F("texelFetch(%s, vec3(%s, %s), %d)", sampName, uv, index, level); + } else { + F("texelFetch(%s, %s, %d)", sampName, uv, level); + } break; } return *this; diff --git a/Common/GPU/ShaderWriter.h b/Common/GPU/ShaderWriter.h index 6eb42719f028..379e70431962 100644 --- a/Common/GPU/ShaderWriter.h +++ b/Common/GPU/ShaderWriter.h @@ -33,13 +33,15 @@ struct VaryingDef { enum class ShaderWriterFlags { NONE = 0, FS_WRITE_DEPTH = 1, + FS_AUTO_STEREO = 2, // Automatically indexes makes samplers tagged with `array` by gl_ViewIndex. Useful for stereo rendering. }; ENUM_CLASS_BITOPS(ShaderWriterFlags); class ShaderWriter { public: // Extensions are supported for both OpenGL ES and Vulkan (though of course, they're different). - ShaderWriter(char *buffer, const ShaderLanguageDesc &lang, ShaderStage stage, Slice extensions = Slice(), ShaderWriterFlags flags = ShaderWriterFlags::NONE) : p_(buffer), lang_(lang), stage_(stage) { + ShaderWriter(char *buffer, const ShaderLanguageDesc &lang, ShaderStage stage, Slice extensions = Slice(), ShaderWriterFlags flags = ShaderWriterFlags::NONE) : p_(buffer), lang_(lang), stage_(stage), flags_(flags) { + buffer[0] = '\0'; Preamble(extensions); } ShaderWriter(const ShaderWriter &) = delete; @@ -78,6 +80,10 @@ class ShaderWriter { // NOTE: samplers must live for the rest of ShaderWriter's lifetime. No way to express that in C++ though :( void DeclareSamplers(Slice samplers); + // Same as DeclareSamplers, but doesn't actually declare them. + // This is currently only required by FragmentShaderGenerator. + void ApplySamplerMetadata(Slice samplers); + void ConstFloat(const char *name, float value); void SetFlags(ShaderWriterFlags flags) { flags_ |= flags; } void SetTexBindingBase(int base) { texBindingBase_ = base; } @@ -121,6 +127,7 @@ class ShaderWriter { char *p_; const ShaderLanguageDesc &lang_; const ShaderStage stage_; + Slice samplers_; ShaderWriterFlags flags_ = ShaderWriterFlags::NONE; Slice samplerDefs_; int texBindingBase_ = 1; diff --git a/Common/GPU/Vulkan/VulkanBarrier.h b/Common/GPU/Vulkan/VulkanBarrier.h index 19edc548ba77..1fefb5adfa6a 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.h +++ b/Common/GPU/Vulkan/VulkanBarrier.h @@ -14,7 +14,7 @@ class VulkanContext; class VulkanBarrier { public: void TransitionImage( - VkImage image, int baseMip, int numMipLevels, VkImageAspectFlags aspectMask, + VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask @@ -36,7 +36,7 @@ class VulkanBarrier { imageBarrier.subresourceRange.aspectMask = aspectMask; imageBarrier.subresourceRange.baseMipLevel = baseMip; imageBarrier.subresourceRange.levelCount = numMipLevels; - imageBarrier.subresourceRange.layerCount = 1; // We never use more than one layer, and old Mali drivers have problems with VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS. + imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those. imageBarrier.subresourceRange.baseArrayLayer = 0; imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -46,7 +46,7 @@ class VulkanBarrier { // Automatically determines access and stage masks from layouts. // Not universally usable, but works for PPSSPP's use. void TransitionImageAuto( - VkImage image, int baseMip, int numMipLevels, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout + VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout ) { _dbg_assert_(image != VK_NULL_HANDLE); @@ -104,7 +104,7 @@ class VulkanBarrier { imageBarrier.subresourceRange.aspectMask = aspectMask; imageBarrier.subresourceRange.baseMipLevel = baseMip; imageBarrier.subresourceRange.levelCount = numMipLevels; - imageBarrier.subresourceRange.layerCount = 1; // We never use more than one layer, and old Mali drivers have problems with VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS. + imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those. imageBarrier.subresourceRange.baseArrayLayer = 0; imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index a3bbf1f0ea86..8a69c0f03721 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -608,8 +608,7 @@ void VulkanContext::ChooseDevice(int physical_device) { deviceFeatures_.enabled.standard.geometryShader = deviceFeatures_.available.standard.geometryShader; deviceFeatures_.enabled.multiview = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES }; - // Don't yet enable these. - // deviceFeatures_.enabled.multiview.multiview = deviceFeatures_.available.multiview.multiview; + deviceFeatures_.enabled.multiview.multiview = deviceFeatures_.available.multiview.multiview; // deviceFeatures_.enabled.multiview.multiviewGeometryShader = deviceFeatures_.available.multiview.multiviewGeometryShader; GetDeviceLayerExtensionList(nullptr, device_extension_properties_); @@ -1246,7 +1245,7 @@ bool VulkanContext::CreateShaderModule(const std::vector &spirv, VkSha } } -void TransitionImageLayout2(VkCommandBuffer cmd, VkImage image, int baseMip, int numMipLevels, VkImageAspectFlags aspectMask, +void TransitionImageLayout2(VkCommandBuffer cmd, VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask) { @@ -1259,7 +1258,7 @@ void TransitionImageLayout2(VkCommandBuffer cmd, VkImage image, int baseMip, int image_memory_barrier.subresourceRange.aspectMask = aspectMask; image_memory_barrier.subresourceRange.baseMipLevel = baseMip; image_memory_barrier.subresourceRange.levelCount = numMipLevels; - image_memory_barrier.subresourceRange.layerCount = 1; // We never use more than one layer, and old Mali drivers have problems with VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS. + image_memory_barrier.subresourceRange.layerCount = numLayers; // We never use more than one layer, and old Mali drivers have problems with VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS. image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier(cmd, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); diff --git a/Common/GPU/Vulkan/VulkanContext.h b/Common/GPU/Vulkan/VulkanContext.h index 60ef6d373ed8..7baf795717ea 100644 --- a/Common/GPU/Vulkan/VulkanContext.h +++ b/Common/GPU/Vulkan/VulkanContext.h @@ -222,6 +222,9 @@ class VulkanContext { SetDebugNameImpl((uint64_t)handle, type, name); } } + bool DebugLayerEnabled() const { + return extensionsLookup_.EXT_debug_utils; + } bool MemoryTypeFromProperties(uint32_t typeBits, VkFlags requirements_mask, uint32_t *typeIndex); @@ -441,7 +444,7 @@ class VulkanContext { }; // Detailed control. -void TransitionImageLayout2(VkCommandBuffer cmd, VkImage image, int baseMip, int mipLevels, VkImageAspectFlags aspectMask, +void TransitionImageLayout2(VkCommandBuffer cmd, VkImage image, int baseMip, int mipLevels, int numLayers, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask); diff --git a/Common/GPU/Vulkan/VulkanImage.cpp b/Common/GPU/Vulkan/VulkanImage.cpp index 72eb339c121b..47b143236089 100644 --- a/Common/GPU/Vulkan/VulkanImage.cpp +++ b/Common/GPU/Vulkan/VulkanImage.cpp @@ -87,7 +87,7 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, i switch (initialLayout) { case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: case VK_IMAGE_LAYOUT_GENERAL: - TransitionImageLayout2(cmd, image_, 0, numMips, VK_IMAGE_ASPECT_COLOR_BIT, + TransitionImageLayout2(cmd, image_, 0, numMips, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, initialLayout, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_WRITE_BIT); @@ -123,6 +123,16 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, i _assert_(res == VK_ERROR_OUT_OF_HOST_MEMORY || res == VK_ERROR_OUT_OF_DEVICE_MEMORY || res == VK_ERROR_TOO_MANY_OBJECTS); return false; } + vulkan_->SetDebugName(view_, VK_OBJECT_TYPE_IMAGE_VIEW, tag_.c_str()); + + // Additionally, create an array view, but only if it's a 2D texture. + if (view_info.viewType == VK_IMAGE_VIEW_TYPE_2D) { + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + res = vkCreateImageView(vulkan_->GetDevice(), &view_info, NULL, &arrayView_); + _assert_(res == VK_SUCCESS); + vulkan_->SetDebugName(arrayView_, VK_OBJECT_TYPE_IMAGE_VIEW, tag_.c_str()); + } + return true; } @@ -164,7 +174,7 @@ void VulkanTexture::GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bo _assert_msg_(firstMipToGenerate < numMips_, "Can't generate levels beyond storage"); // Transition the pre-set levels to GENERAL. - TransitionImageLayout2(cmd, image_, 0, firstMipToGenerate, VK_IMAGE_ASPECT_COLOR_BIT, + TransitionImageLayout2(cmd, image_, 0, firstMipToGenerate, 1, VK_IMAGE_ASPECT_COLOR_BIT, fromCompute ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, fromCompute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : VK_PIPELINE_STAGE_TRANSFER_BIT, @@ -173,7 +183,7 @@ void VulkanTexture::GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bo VK_ACCESS_TRANSFER_READ_BIT); // Do the same with the uninitialized levels. - TransitionImageLayout2(cmd, image_, firstMipToGenerate, numMips_ - firstMipToGenerate, + TransitionImageLayout2(cmd, image_, firstMipToGenerate, numMips_ - firstMipToGenerate, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, @@ -206,7 +216,7 @@ void VulkanTexture::GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bo vkCmdBlitImage(cmd, image_, VK_IMAGE_LAYOUT_GENERAL, image_, VK_IMAGE_LAYOUT_GENERAL, 1, &blit, VK_FILTER_LINEAR); - TransitionImageLayout2(cmd, image_, mip, 1, VK_IMAGE_ASPECT_COLOR_BIT, + TransitionImageLayout2(cmd, image_, mip, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); @@ -214,7 +224,7 @@ void VulkanTexture::GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bo } void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkPipelineStageFlags prevStage, VkImageLayout layout) { - TransitionImageLayout2(cmd, image_, 0, numMips_, + TransitionImageLayout2(cmd, image_, 0, numMips_, 1, VK_IMAGE_ASPECT_COLOR_BIT, layout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, prevStage, vertexTexture ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, @@ -237,6 +247,7 @@ VkImageView VulkanTexture::CreateViewForMip(int mip) { view_info.subresourceRange.layerCount = 1; VkImageView view; VkResult res = vkCreateImageView(vulkan_->GetDevice(), &view_info, NULL, &view); + vulkan_->SetDebugName(view, VK_OBJECT_TYPE_IMAGE_VIEW, "mipview"); _assert_(res == VK_SUCCESS); return view; } @@ -245,6 +256,9 @@ void VulkanTexture::Destroy() { if (view_ != VK_NULL_HANDLE) { vulkan_->Delete().QueueDeleteImageView(view_); } + if (arrayView_ != VK_NULL_HANDLE) { + vulkan_->Delete().QueueDeleteImageView(arrayView_); + } if (image_ != VK_NULL_HANDLE) { _dbg_assert_(allocation_ != VK_NULL_HANDLE); vulkan_->Delete().QueueDeleteImageAllocation(image_, allocation_); diff --git a/Common/GPU/Vulkan/VulkanImage.h b/Common/GPU/Vulkan/VulkanImage.h index 50d7e0c7f586..e0aa32d385f1 100644 --- a/Common/GPU/Vulkan/VulkanImage.h +++ b/Common/GPU/Vulkan/VulkanImage.h @@ -51,6 +51,9 @@ class VulkanTexture { // Used for sampling, generally. VkImageView GetImageView() const { return view_; } + // For use with some shaders, we might want to view it as a single entry array for convenience. + VkImageView GetImageArrayView() const { return arrayView_; } + int32_t GetWidth() const { return width_; } int32_t GetHeight() const { return height_; } int32_t GetNumMips() const { return numMips_; } @@ -62,6 +65,7 @@ class VulkanTexture { VulkanContext *vulkan_; VkImage image_ = VK_NULL_HANDLE; VkImageView view_ = VK_NULL_HANDLE; + VkImageView arrayView_ = VK_NULL_HANDLE; VmaAllocation allocation_ = VK_NULL_HANDLE; int16_t width_ = 0; diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp index 8206c80c912e..d8184098a552 100644 --- a/Common/GPU/Vulkan/VulkanMemory.cpp +++ b/Common/GPU/Vulkan/VulkanMemory.cpp @@ -153,7 +153,7 @@ void VulkanDescSetPool::Create(VulkanContext *vulkan, const VkDescriptorPoolCrea _assert_msg_(res == VK_SUCCESS, "Could not create VulkanDescSetPool %s", tag_); } -VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout *layouts) { +VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout *layouts, const char *tag) { if (descPool_ == VK_NULL_HANDLE || usage_ + n >= info_.maxSets) { // Missing or out of space, need to recreate. VkResult res = Recreate(grow_); @@ -180,9 +180,12 @@ VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout * _assert_msg_(result == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to allocate after recreating a descriptor pool. res=%d", (int)result); } - if (result == VK_SUCCESS) - return desc; - return VK_NULL_HANDLE; + if (result != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + + vulkan_->SetDebugName(desc, VK_OBJECT_TYPE_DESCRIPTOR_SET, tag); + return desc; } void VulkanDescSetPool::Reset() { diff --git a/Common/GPU/Vulkan/VulkanMemory.h b/Common/GPU/Vulkan/VulkanMemory.h index 79a71d4ce6fd..275ca4bb78e7 100644 --- a/Common/GPU/Vulkan/VulkanMemory.h +++ b/Common/GPU/Vulkan/VulkanMemory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -121,6 +122,15 @@ class VulkanPushBuffer { return writePtr_ + off; } + template + void PushUBOData(const T &data, VkDescriptorBufferInfo *info) { + uint32_t bindOffset; + void *ptr = PushAligned(sizeof(T), &bindOffset, &info->buffer, vulkan_->GetPhysicalDeviceProperties().properties.limits.minUniformBufferOffsetAlignment); + memcpy(ptr, &data, sizeof(T)); + info->offset = bindOffset; + info->range = sizeof(T); + } + size_t GetTotalSize() const; private: @@ -153,7 +163,7 @@ class VulkanDescSetPool { void Create(VulkanContext *vulkan, const VkDescriptorPoolCreateInfo &info, const std::vector &sizes); // Allocate a new set, which may resize and empty the current sets. // Use only for the current frame, unless in a cache cleared by clear_. - VkDescriptorSet Allocate(int n, const VkDescriptorSetLayout *layouts); + VkDescriptorSet Allocate(int n, const VkDescriptorSetLayout *layouts, const char *tag); void Reset(); void Destroy(); diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 98bd2e777359..1e31ca8525c3 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -37,6 +37,8 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) { return a; } + _dbg_assert_((a & RP_TYPE_MULTIVIEW_COLOR) == (b & RP_TYPE_MULTIVIEW_COLOR)); + // The rest we can just OR together to get the maximum feature set. return (RenderPassType)((u32)a | (u32)b); } @@ -166,7 +168,7 @@ bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) { color_image_view.subresourceRange.baseMipLevel = 0; color_image_view.subresourceRange.levelCount = 1; color_image_view.subresourceRange.baseArrayLayer = 0; - color_image_view.subresourceRange.layerCount = 1; + color_image_view.subresourceRange.layerCount = 1; // TODO: Investigate hw-assisted stereo. color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D; color_image_view.flags = 0; color_image_view.image = sc_buffer.image; @@ -176,6 +178,7 @@ bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) { // Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417. res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view); + vulkan_->SetDebugName(sc_buffer.view, VK_OBJECT_TYPE_IMAGE_VIEW, "swapchain_view"); swapchainImages_.push_back(sc_buffer); _dbg_assert_(res == VK_SUCCESS); } @@ -249,7 +252,7 @@ bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) { vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth"); - TransitionImageLayout2(cmd, depth_.image, 0, 1, + TransitionImageLayout2(cmd, depth_.image, 0, 1, 1, aspectMask, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, @@ -274,6 +277,7 @@ bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) { VkDevice device = vulkan_->GetDevice(); res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view); + vulkan_->SetDebugName(depth_.view, VK_OBJECT_TYPE_IMAGE_VIEW, "depth_stencil_backbuffer"); _dbg_assert_(res == VK_SUCCESS); if (res != VK_SUCCESS) return false; @@ -326,9 +330,14 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) { // Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) { - bool selfDependency = rpType == RP_TYPE_COLOR_INPUT || rpType == RP_TYPE_COLOR_DEPTH_INPUT; + bool selfDependency = RenderPassTypeHasInput(rpType); bool isBackbuffer = rpType == RP_TYPE_BACKBUFFER; - bool hasDepth = rpType == RP_TYPE_BACKBUFFER || rpType == RP_TYPE_COLOR_DEPTH || rpType == RP_TYPE_COLOR_DEPTH_INPUT; + bool hasDepth = RenderPassTypeHasDepth(rpType); + bool multiview = RenderPassTypeHasMultiView(rpType); + + if (multiview) { + // TODO: Assert that the device has multiview support enabled. + } VkAttachmentDescription attachments[2] = {}; attachments[0].format = isBackbuffer ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM; @@ -390,6 +399,19 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas rp.subpassCount = 1; rp.pSubpasses = &subpass; + VkRenderPassMultiviewCreateInfoKHR mv{ VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR }; + uint32_t viewMask = 0x3; // Must be outside the 'if (multiview)' scope! + int viewOffset = 0; + if (multiview) { + rp.pNext = &mv; + mv.subpassCount = 1; + mv.pViewMasks = &viewMask; + mv.dependencyCount = 0; + mv.pCorrelationMasks = &viewMask; // same masks + mv.correlationMaskCount = 1; + mv.pViewOffsets = &viewOffset; + } + if (isBackbuffer) { deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL; deps[numDeps].dstSubpass = 0; @@ -457,6 +479,7 @@ void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags img.image, 0, 1, + img.numLayers, aspect, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, @@ -891,6 +914,10 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const { case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER_DEPTH"; break; case RP_TYPE_COLOR_INPUT: renderCmd = "RENDER_INPUT"; break; case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_DEPTH_INPUT"; break; + case RP_TYPE_MULTIVIEW_COLOR: renderCmd = "MV_RENDER"; break; + case RP_TYPE_MULTIVIEW_COLOR_DEPTH: renderCmd = "MV_RENDER_DEPTH"; break; + case RP_TYPE_MULTIVIEW_COLOR_INPUT: renderCmd = "MV_RENDER_INPUT"; break; + case RP_TYPE_MULTIVIEW_COLOR_DEPTH_INPUT: renderCmd = "MV_RENDER_DEPTH_INPUT"; break; default: renderCmd = "N/A"; } snprintf(buffer, sizeof(buffer), "%s %s %s (draws: %d, %dx%d/%dx%d)", renderCmd, step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", step.render.numDraws, actual_w, actual_h, w, h); @@ -1145,7 +1172,7 @@ void VulkanQueueRunner::LogReadbackImage(const VKRStep &step) { INFO_LOG(G3D, "%s", StepToString(step).c_str()); } -void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout colorLayout, VkImage depthStencilImage, VkImageLayout depthStencilLayout, VulkanBarrier *recordBarrier) { +void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout colorLayout, VkImage depthStencilImage, VkImageLayout depthStencilLayout, int numLayers, VulkanBarrier *recordBarrier) { if (colorLayout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { VkPipelineStageFlags srcStageMask = 0; VkAccessFlags srcAccessMask = 0; @@ -1178,7 +1205,7 @@ void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout break; } recordBarrier->TransitionImage( - colorImage, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, + colorImage, 0, 1, numLayers, VK_IMAGE_ASPECT_COLOR_BIT, colorLayout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, srcAccessMask, @@ -1214,7 +1241,7 @@ void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout break; } recordBarrier->TransitionImage( - depthStencilImage, 0, 1, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, + depthStencilImage, 0, 1, numLayers, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, depthStencilLayout, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, srcAccessMask, @@ -1224,7 +1251,7 @@ void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout } } -void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout colorLayout, VkImage depthStencilImage, VkImageLayout depthStencilLayout) { +void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout colorLayout, VkImage depthStencilImage, int numLayers, VkImageLayout depthStencilLayout) { VkPipelineStageFlags srcStageMask = 0; VkPipelineStageFlags dstStageMask = 0; @@ -1266,7 +1293,7 @@ void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayou barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; barrier[0].subresourceRange.baseMipLevel = 0; barrier[0].subresourceRange.levelCount = 1; - barrier[0].subresourceRange.layerCount = 1; + barrier[0].subresourceRange.layerCount = numLayers; barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrierCount++; @@ -1305,7 +1332,7 @@ void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayou barrier[barrierCount].subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; barrier[barrierCount].subresourceRange.baseMipLevel = 0; barrier[barrierCount].subresourceRange.levelCount = 1; - barrier[barrierCount].subresourceRange.layerCount = 1; + barrier[barrierCount].subresourceRange.layerCount = numLayers; barrier[barrierCount].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier[barrierCount].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrierCount++; @@ -1323,6 +1350,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c iter.fb->color.image, 0, 1, + iter.fb->numLayers, VK_IMAGE_ASPECT_COLOR_BIT, iter.fb->color.layout, iter.targetLayout @@ -1333,6 +1361,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c iter.fb->depth.image, 0, 1, + iter.fb->numLayers, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, iter.fb->depth.layout, iter.targetLayout @@ -1362,6 +1391,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c step.render.framebuffer->color.image, 0, 1, + step.render.framebuffer->numLayers, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, @@ -1376,6 +1406,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c step.render.framebuffer->depth.image, 0, 1, + step.render.framebuffer->numLayers, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, @@ -1415,7 +1446,8 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c const RenderPassType rpType = step.render.renderPassType; - for (const auto &c : commands) { + for (size_t i = 0; i < commands.size(); i++) { + const VkRenderData &c = commands[i]; switch (c.cmd) { case VKRRenderCommand::REMOVED: break; @@ -1557,7 +1589,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c int numAttachments = 0; VkClearRect rc{}; rc.baseArrayLayer = 0; - rc.layerCount = 1; + rc.layerCount = 1; // In multiview mode, 1 means to replicate to all the active layers. rc.rect.extent.width = (uint32_t)curWidth; rc.rect.extent.height = (uint32_t)curHeight; VkClearAttachment attachments[2]{}; @@ -1595,13 +1627,14 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c default: ERROR_LOG(G3D, "Unimpl queue command"); + break; } } vkCmdEndRenderPass(cmd); if (fb) { // If the desired final layout aren't the optimal layout for rendering, transition. - TransitionFromOptimal(cmd, fb->color.image, step.render.finalColorLayout, fb->depth.image, step.render.finalDepthStencilLayout); + TransitionFromOptimal(cmd, fb->color.image, step.render.finalColorLayout, fb->depth.image, fb->numLayers, step.render.finalDepthStencilLayout); fb->color.layout = step.render.finalColorLayout; fb->depth.layout = step.render.finalDepthStencilLayout; @@ -1630,6 +1663,7 @@ VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKR VKRFramebuffer *fb = step.render.framebuffer; framebuf = fb->Get(renderPass, step.render.renderPassType); + _dbg_assert_(framebuf != VK_NULL_HANDLE); w = fb->width; h = fb->height; @@ -1641,7 +1675,7 @@ VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKR step.render.colorLoad == VKRRenderPassLoadAction::CLEAR && vulkan_->GetPhysicalDeviceProperties().properties.driverVersion == 0xaa9c4b29; if (maliBugWorkaround) { - recordBarrier_.TransitionImage(step.render.framebuffer->color.image, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, + recordBarrier_.TransitionImage(fb->color.image, 0, 1, fb->numLayers, VK_IMAGE_ASPECT_COLOR_BIT, fb->color.layout, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, @@ -1649,7 +1683,7 @@ VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKR fb->color.layout = VK_IMAGE_LAYOUT_GENERAL; } - TransitionToOptimal(cmd, fb->color.image, fb->color.layout, fb->depth.image, fb->depth.layout, &recordBarrier_); + TransitionToOptimal(cmd, fb->color.image, fb->color.layout, fb->depth.image, fb->depth.layout, fb->numLayers, &recordBarrier_); // The transition from the optimal format happens after EndRenderPass, now that we don't // do it as part of the renderpass itself anymore. @@ -1719,20 +1753,11 @@ void VulkanQueueRunner::PerformCopy(const VKRStep &step, VkCommandBuffer cmd) { VKRFramebuffer *src = step.copy.src; VKRFramebuffer *dst = step.copy.dst; - VkImageCopy copy{}; - copy.srcOffset.x = step.copy.srcRect.offset.x; - copy.srcOffset.y = step.copy.srcRect.offset.y; - copy.srcOffset.z = 0; - copy.srcSubresource.mipLevel = 0; - copy.srcSubresource.layerCount = 1; - copy.dstOffset.x = step.copy.dstPos.x; - copy.dstOffset.y = step.copy.dstPos.y; - copy.dstOffset.z = 0; - copy.dstSubresource.mipLevel = 0; - copy.dstSubresource.layerCount = 1; - copy.extent.width = step.copy.srcRect.extent.width; - copy.extent.height = step.copy.srcRect.extent.height; - copy.extent.depth = 1; + int layerCount = std::min(step.copy.src->numLayers, step.copy.dst->numLayers); + _dbg_assert_(step.copy.src->numLayers >= step.copy.dst->numLayers); + + // TODO: If dst covers exactly the whole destination, we can set up a UNDEFINED->TRANSFER_DST_OPTIMAL transition, + // which can potentially be more efficient. // First source barriers. if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { @@ -1763,6 +1788,21 @@ void VulkanQueueRunner::PerformCopy(const VKRStep &step, VkCommandBuffer cmd) { recordBarrier_.Flush(cmd); + VkImageCopy copy{}; + copy.srcOffset.x = step.copy.srcRect.offset.x; + copy.srcOffset.y = step.copy.srcRect.offset.y; + copy.srcOffset.z = 0; + copy.srcSubresource.mipLevel = 0; + copy.srcSubresource.layerCount = layerCount; + copy.dstOffset.x = step.copy.dstPos.x; + copy.dstOffset.y = step.copy.dstPos.y; + copy.dstOffset.z = 0; + copy.dstSubresource.mipLevel = 0; + copy.dstSubresource.layerCount = layerCount; + copy.extent.width = step.copy.srcRect.extent.width; + copy.extent.height = step.copy.srcRect.extent.height; + copy.extent.depth = 1; + if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; @@ -1781,35 +1821,12 @@ void VulkanQueueRunner::PerformBlit(const VKRStep &step, VkCommandBuffer cmd) { // The barrier code doesn't handle this case. We'd need to transition to GENERAL to do an intra-image copy. _dbg_assert_(step.blit.src != step.blit.dst); + int layerCount = std::min(step.blit.src->numLayers, step.blit.dst->numLayers); + _dbg_assert_(step.blit.src->numLayers >= step.blit.dst->numLayers); + VKRFramebuffer *src = step.blit.src; VKRFramebuffer *dst = step.blit.dst; - // If any validation needs to be performed here, it should probably have been done - // already when the blit was queued. So don't validate here. - VkImageBlit blit{}; - blit.srcOffsets[0].x = step.blit.srcRect.offset.x; - blit.srcOffsets[0].y = step.blit.srcRect.offset.y; - blit.srcOffsets[0].z = 0; - blit.srcOffsets[1].x = step.blit.srcRect.offset.x + step.blit.srcRect.extent.width; - blit.srcOffsets[1].y = step.blit.srcRect.offset.y + step.blit.srcRect.extent.height; - blit.srcOffsets[1].z = 1; - blit.srcSubresource.mipLevel = 0; - blit.srcSubresource.layerCount = 1; - blit.dstOffsets[0].x = step.blit.dstRect.offset.x; - blit.dstOffsets[0].y = step.blit.dstRect.offset.y; - blit.dstOffsets[0].z = 0; - blit.dstOffsets[1].x = step.blit.dstRect.offset.x + step.blit.dstRect.extent.width; - blit.dstOffsets[1].y = step.blit.dstRect.offset.y + step.blit.dstRect.extent.height; - blit.dstOffsets[1].z = 1; - blit.dstSubresource.mipLevel = 0; - blit.dstSubresource.layerCount = 1; - - VkPipelineStageFlags srcStage = 0; - VkPipelineStageFlags dstStage = 0; - - int srcCount = 0; - int dstCount = 0; - // First source barriers. if (step.blit.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { if (src->color.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { @@ -1835,6 +1852,26 @@ void VulkanQueueRunner::PerformBlit(const VKRStep &step, VkCommandBuffer cmd) { recordBarrier_.Flush(cmd); + // If any validation needs to be performed here, it should probably have been done + // already when the blit was queued. So don't validate here. + VkImageBlit blit{}; + blit.srcOffsets[0].x = step.blit.srcRect.offset.x; + blit.srcOffsets[0].y = step.blit.srcRect.offset.y; + blit.srcOffsets[0].z = 0; + blit.srcOffsets[1].x = step.blit.srcRect.offset.x + step.blit.srcRect.extent.width; + blit.srcOffsets[1].y = step.blit.srcRect.offset.y + step.blit.srcRect.extent.height; + blit.srcOffsets[1].z = 1; + blit.srcSubresource.mipLevel = 0; + blit.srcSubresource.layerCount = layerCount; + blit.dstOffsets[0].x = step.blit.dstRect.offset.x; + blit.dstOffsets[0].y = step.blit.dstRect.offset.y; + blit.dstOffsets[0].z = 0; + blit.dstOffsets[1].x = step.blit.dstRect.offset.x + step.blit.dstRect.extent.width; + blit.dstOffsets[1].y = step.blit.dstRect.offset.y + step.blit.dstRect.extent.height; + blit.dstOffsets[1].z = 1; + blit.dstSubresource.mipLevel = 0; + blit.dstSubresource.layerCount = layerCount; + if (step.blit.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; @@ -1895,6 +1932,7 @@ void VulkanQueueRunner::SetupTransitionToTransferSrc(VKRImage &img, VkImageAspec img.image, 0, 1, + img.numLayers, imageAspect, img.layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, @@ -1943,6 +1981,7 @@ void VulkanQueueRunner::SetupTransitionToTransferDst(VKRImage &img, VkImageAspec img.image, 0, 1, + img.numLayers, aspect, img.layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, @@ -1972,6 +2011,7 @@ void VulkanQueueRunner::SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAs img.image, 0, 1, + img.numLayers, aspect, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, @@ -2000,7 +2040,8 @@ void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd if (step.readback.src == nullptr) { // We only take screenshots after the main render pass (anything else would be stupid) so we need to transition out of PRESENT, // and then back into it. - TransitionImageLayout2(cmd, backbufferImage_, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, + // Regarding layers, backbuffer currently only has one layer. + TransitionImageLayout2(cmd, backbufferImage_, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_READ_BIT); @@ -2034,7 +2075,8 @@ void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd if (step.readback.src == nullptr) { // We only take screenshots after the main render pass (anything else would be stupid) so we need to transition out of PRESENT, // and then back into it. - TransitionImageLayout2(cmd, backbufferImage_, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, + // Regarding layers, backbuffer currently only has one layer. + TransitionImageLayout2(cmd, backbufferImage_, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0); @@ -2065,7 +2107,7 @@ void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffe vkCmdCopyImageToBuffer(cmd, step.readback_image.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, readbackBuffer_, 1, ®ion); // Now transfer it back to a texture. - TransitionImageLayout2(cmd, step.readback_image.image, 0, 1, + TransitionImageLayout2(cmd, step.readback_image.image, 0, 1, 1, // I don't think we have any multilayer cases for regular textures. Above in PerformReadback, though.. VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index ab0bc3ac7191..effc3645bc72 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -42,24 +42,30 @@ enum class VKRRenderCommand : uint8_t { NUM_RENDER_COMMANDS, }; -enum class PipelineFlags { +enum class PipelineFlags : u8 { NONE = 0, USES_BLEND_CONSTANT = (1 << 1), USES_DEPTH_STENCIL = (1 << 2), // Reads or writes the depth or stencil buffers. USES_INPUT_ATTACHMENT = (1 << 3), USES_GEOMETRY_SHADER = (1 << 4), + USES_MULTIVIEW = (1 << 5), // Inherited from the render pass it was created with. }; ENUM_CLASS_BITOPS(PipelineFlags); // Pipelines need to be created for the right type of render pass. enum RenderPassType { - // These four are organized so that bit 0 is DEPTH and bit 1 is INPUT, so + // These eight are organized so that bit 0 is DEPTH and bit 1 is INPUT and bit 2 is MULTIVIEW, so // they can be OR-ed together in MergeRPTypes. RP_TYPE_COLOR, RP_TYPE_COLOR_DEPTH, RP_TYPE_COLOR_INPUT, RP_TYPE_COLOR_DEPTH_INPUT, + RP_TYPE_MULTIVIEW_COLOR, + RP_TYPE_MULTIVIEW_COLOR_DEPTH, + RP_TYPE_MULTIVIEW_COLOR_INPUT, + RP_TYPE_MULTIVIEW_COLOR_DEPTH_INPUT, + // This is the odd one out, and gets special handling in MergeRPTypes. RP_TYPE_BACKBUFFER, // For the backbuffer we can always use CLEAR/DONT_CARE, so bandwidth cost for a depth channel is negligible. @@ -67,12 +73,18 @@ enum RenderPassType { RP_TYPE_COUNT, }; +// Hm, soon time to exploit the bit properties in these.. + inline bool RenderPassTypeHasDepth(RenderPassType type) { - return type == RP_TYPE_BACKBUFFER || type == RP_TYPE_COLOR_DEPTH || type == RP_TYPE_COLOR_DEPTH_INPUT; + return type == RP_TYPE_BACKBUFFER || type == RP_TYPE_COLOR_DEPTH || type == RP_TYPE_COLOR_DEPTH_INPUT || type == RP_TYPE_MULTIVIEW_COLOR_DEPTH || type == RP_TYPE_MULTIVIEW_COLOR_DEPTH_INPUT; } inline bool RenderPassTypeHasInput(RenderPassType type) { - return type == RP_TYPE_COLOR_INPUT || type == RP_TYPE_COLOR_DEPTH_INPUT; + return type == RP_TYPE_COLOR_INPUT || type == RP_TYPE_COLOR_DEPTH_INPUT || type == RP_TYPE_MULTIVIEW_COLOR_INPUT || type == RP_TYPE_MULTIVIEW_COLOR_DEPTH_INPUT; +} + +inline bool RenderPassTypeHasMultiView(RenderPassType type) { + return type == RP_TYPE_MULTIVIEW_COLOR || type == RP_TYPE_MULTIVIEW_COLOR_DEPTH || type == RP_TYPE_MULTIVIEW_COLOR_INPUT || type == RP_TYPE_MULTIVIEW_COLOR_DEPTH_INPUT; } struct VkRenderData { @@ -103,7 +115,7 @@ struct VkRenderData { VkDescriptorSet ds; int numUboOffsets; uint32_t uboOffsets[3]; - VkBuffer vbuffer; // might need to increase at some point + VkBuffer vbuffer; VkBuffer ibuffer; uint32_t voffset; uint32_t ioffset; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 8da78975d253..314e1920c3bd 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -155,15 +155,14 @@ bool VKRComputePipeline::Create(VulkanContext *vulkan) { return success; } -VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, bool createDepthStencilBuffer, const char *tag) : vulkan_(vk), tag_(tag) { - width = _width; - height = _height; +VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, bool createDepthStencilBuffer, const char *tag) + : vulkan_(vk), tag_(tag), width(_width), height(_height), numLayers(_numLayers) { _dbg_assert_(tag); - CreateImage(vulkan_, initCmd, color, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag); + CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag); if (createDepthStencilBuffer) { - CreateImage(vulkan_, initCmd, depth, width, height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag); + CreateImage(vulkan_, initCmd, depth, width, height, numLayers, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag); } UpdateTag(tag); @@ -176,11 +175,11 @@ void VKRFramebuffer::UpdateTag(const char *newTag) { char name[128]; snprintf(name, sizeof(name), "fb_color_%s", tag_.c_str()); vulkan_->SetDebugName(color.image, VK_OBJECT_TYPE_IMAGE, name); - vulkan_->SetDebugName(color.imageView, VK_OBJECT_TYPE_IMAGE_VIEW, name); + vulkan_->SetDebugName(color.rtView, VK_OBJECT_TYPE_IMAGE_VIEW, name); if (depth.image) { snprintf(name, sizeof(name), "fb_depth_%s", tag_.c_str()); vulkan_->SetDebugName(depth.image, VK_OBJECT_TYPE_IMAGE, name); - vulkan_->SetDebugName(depth.imageView, VK_OBJECT_TYPE_IMAGE_VIEW, name); + vulkan_->SetDebugName(depth.rtView, VK_OBJECT_TYPE_IMAGE_VIEW, name); } for (int rpType = 0; rpType < RP_TYPE_COUNT; rpType++) { if (framebuf[rpType]) { @@ -191,6 +190,8 @@ void VKRFramebuffer::UpdateTag(const char *newTag) { } VkFramebuffer VKRFramebuffer::Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType) { + bool multiview = RenderPassTypeHasMultiView(rpType); + if (framebuf[(int)rpType]) { return framebuf[(int)rpType]; } @@ -198,19 +199,17 @@ VkFramebuffer VKRFramebuffer::Get(VKRRenderPass *compatibleRenderPass, RenderPas VkFramebufferCreateInfo fbci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO }; VkImageView views[2]{}; - bool hasDepth = rpType == RP_TYPE_BACKBUFFER || rpType == RP_TYPE_COLOR_DEPTH || rpType == RP_TYPE_COLOR_DEPTH_INPUT; - - views[0] = color.imageView; + bool hasDepth = RenderPassTypeHasDepth(rpType); + views[0] = color.rtView; // 2D array texture if multilayered. if (hasDepth) { - _dbg_assert_(depth.imageView != VK_NULL_HANDLE); - views[1] = depth.imageView; + views[1] = depth.rtView; } fbci.renderPass = compatibleRenderPass->Get(vulkan_, rpType); fbci.attachmentCount = hasDepth ? 2 : 1; fbci.pAttachments = views; fbci.width = width; fbci.height = height; - fbci.layers = 1; + fbci.layers = 1; // With multiview, this should be set as 1. VkResult res = vkCreateFramebuffer(vulkan_->GetDevice(), &fbci, nullptr, &framebuf[(int)rpType]); _assert_(res == VK_SUCCESS); @@ -223,10 +222,24 @@ VkFramebuffer VKRFramebuffer::Get(VKRRenderPass *compatibleRenderPass, RenderPas } VKRFramebuffer::~VKRFramebuffer() { - if (color.imageView) - vulkan_->Delete().QueueDeleteImageView(color.imageView); - if (depth.imageView) - vulkan_->Delete().QueueDeleteImageView(depth.imageView); + // Get rid of the views first, feels cleaner (but in reality doesn't matter). + if (color.rtView) + vulkan_->Delete().QueueDeleteImageView(color.rtView); + if (depth.rtView) + vulkan_->Delete().QueueDeleteImageView(depth.rtView); + if (color.texAllLayersView) + vulkan_->Delete().QueueDeleteImageView(color.texAllLayersView); + if (depth.texAllLayersView) + vulkan_->Delete().QueueDeleteImageView(depth.texAllLayersView); + for (int i = 0; i < 2; i++) { + if (color.texLayerViews[i]) { + vulkan_->Delete().QueueDeleteImageView(color.texLayerViews[i]); + } + if (depth.texLayerViews[i]) { + vulkan_->Delete().QueueDeleteImageView(depth.texLayerViews[i]); + } + } + if (color.image) { _dbg_assert_(color.alloc); vulkan_->Delete().QueueDeleteImageAllocation(color.image, color.alloc); @@ -235,17 +248,19 @@ VKRFramebuffer::~VKRFramebuffer() { _dbg_assert_(depth.alloc); vulkan_->Delete().QueueDeleteImageAllocation(depth.image, depth.alloc); } - if (depth.depthSampleView) - vulkan_->Delete().QueueDeleteImageView(depth.depthSampleView); for (auto &fb : framebuf) { - if (fb) + if (fb) { vulkan_->Delete().QueueDeleteFramebuffer(fb); + } } } -void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) { +void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) { + // We don't support more exotic layer setups for now. Mono or stereo. + _dbg_assert_(numLayers == 1 || numLayers == 2); + VkImageCreateInfo ici{ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; - ici.arrayLayers = 1; + ici.arrayLayers = numLayers; ici.mipLevels = 1; ici.extent.width = width; ici.extent.height = height; @@ -276,20 +291,37 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int ivci.components = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }; ivci.format = ici.format; ivci.image = img.image; - ivci.viewType = VK_IMAGE_VIEW_TYPE_2D; + ivci.viewType = numLayers == 1 ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_2D_ARRAY; ivci.subresourceRange.aspectMask = aspects; - ivci.subresourceRange.layerCount = 1; + ivci.subresourceRange.layerCount = numLayers; ivci.subresourceRange.levelCount = 1; - res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.imageView); + res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.rtView); + vulkan->SetDebugName(img.rtView, VK_OBJECT_TYPE_IMAGE_VIEW, tag); + _dbg_assert_(res == VK_SUCCESS); - // Separate view for texture sampling that only exposes depth. + // Separate view for texture sampling all layers together. if (!color) { ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.depthSampleView); + } + + ivci.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; // layered for consistency, even if single image. + res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.texAllLayersView); + vulkan->SetDebugName(img.texAllLayersView, VK_OBJECT_TYPE_IMAGE_VIEW, tag); + + // Create 2D views for both layers. + // Useful when multipassing shaders that don't yet exist in a single-pass-stereo version. + for (int i = 0; i < numLayers; i++) { + ivci.viewType = VK_IMAGE_VIEW_TYPE_2D; + ivci.subresourceRange.layerCount = 1; + ivci.subresourceRange.baseArrayLayer = i; + res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.texLayerViews[i]); + if (vulkan->DebugLayerEnabled()) { + char temp[128]; + snprintf(temp, sizeof(temp), "%s_layer%d", tag, i); + vulkan->SetDebugName(img.texLayerViews[i], VK_OBJECT_TYPE_IMAGE_VIEW, temp); + } _dbg_assert_(res == VK_SUCCESS); - } else { - img.depthSampleView = VK_NULL_HANDLE; } VkPipelineStageFlags dstStage; @@ -312,14 +344,14 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int return; } - TransitionImageLayout2(cmd, img.image, 0, 1, aspects, + TransitionImageLayout2(cmd, img.image, 0, 1, numLayers, aspects, VK_IMAGE_LAYOUT_UNDEFINED, initialLayout, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dstStage, 0, dstAccessMask); img.layout = initialLayout; - img.format = format; img.tag = tag ? tag : "N/A"; + img.numLayers = numLayers; } VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) @@ -711,10 +743,19 @@ void VulkanRenderManager::EndCurRenderStep() { RenderPassType rpType = depthStencil ? RP_TYPE_COLOR_DEPTH : RP_TYPE_COLOR; if (!curRenderStep_->render.framebuffer) { rpType = RP_TYPE_BACKBUFFER; - } else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) { - // Not allowed on backbuffers. - rpType = depthStencil ? RP_TYPE_COLOR_DEPTH_INPUT : RP_TYPE_COLOR_INPUT; + } else { + if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) { + // Not allowed on backbuffers. + rpType = depthStencil ? RP_TYPE_COLOR_DEPTH_INPUT : RP_TYPE_COLOR_INPUT; + } + // Framebuffers can be stereo, and if so, will control the render pass type to match. + // Pipelines can be mono and render fine to stereo etc, so not checking them here. + // Note that we don't support rendering to just one layer of a multilayer framebuffer! + if (curRenderStep_->render.framebuffer->numLayers > 1) { + rpType = (RenderPassType)(rpType | RP_TYPE_MULTIVIEW_COLOR); + } } + // TODO: Also add render pass types for depth/stencil-less. VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key); @@ -1176,7 +1217,7 @@ void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, steps_.push_back(step); } -VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBit) { +VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBit, int layer) { _dbg_assert_(curRenderStep_ != nullptr); // We don't support texturing from stencil, neither do we support texturing from depth|stencil together (nonsensical). @@ -1211,7 +1252,12 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in // Add this pretransition unless we already have it. TransitionRequest rq{ fb, aspectBit, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; curRenderStep_->preTransitions.insert(rq); // Note that insert avoids inserting duplicates. - return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.depthSampleView; + + if (layer == -1) { + return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texAllLayersView : fb->depth.texAllLayersView; + } else { + return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texLayerViews[layer] : fb->depth.texLayerViews[layer]; + } } // Called on main thread. diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 074e0c1db46b..afe8ac15feaa 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -24,33 +24,47 @@ // Forward declaration VK_DEFINE_HANDLE(VmaAllocation); -// Simple independent framebuffer image. Gets its own allocation, we don't have that many framebuffers so it's fine -// to let them have individual non-pooled allocations. Until it's not fine. We'll see. +// Simple independent framebuffer image. struct VKRImage { // These four are "immutable". VkImage image; - VkImageView imageView; - VkImageView depthSampleView; + + VkImageView rtView; // Used for rendering to, and readbacks of stencil. 2D if single layer, 2D_ARRAY if multiple. Includes both depth and stencil if depth/stencil. + + // This is for texturing all layers at once. If aspect is depth/stencil, does not include stencil. + VkImageView texAllLayersView; + + // If it's a layered image (for stereo), this is two 2D views of it, to make it compatible with shaders that don't yet support stereo. + // If there's only one layer, layerViews[0] only is initialized. + VkImageView texLayerViews[2]{}; + VmaAllocation alloc; VkFormat format; // This one is used by QueueRunner's Perform functions to keep track. CANNOT be used anywhere else due to sync issues. VkImageLayout layout; + int numLayers; + // For debugging. std::string tag; }; -void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag); + +// NOTE: If numLayers > 1, it will create an array texture, rather than a normal 2D texture. +// This requires a different sampling path! +void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag); class VKRFramebuffer { public: - VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, bool createDepthStencilBuffer, const char *tag); + VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, bool createDepthStencilBuffer, const char *tag); ~VKRFramebuffer(); VkFramebuffer Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType); int width = 0; int height = 0; + int numLayers = 0; + VKRImage color{}; // color.image is always there. VKRImage depth{}; // depth.image is allowed to be VK_NULL_HANDLE. @@ -211,7 +225,7 @@ class VulkanRenderManager { // Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown. void Wipe(); - // This starts a new step containing a render pass. + // This starts a new step containing a render pass (unless it can be trivially merged into the previous one, which is pretty common). // // After a "CopyFramebuffer" or the other functions that start "steps", you need to call this beforce // making any new render state changes or draw calls. @@ -230,7 +244,9 @@ class VulkanRenderManager { // Returns an ImageView corresponding to a framebuffer. Is called BindFramebufferAsTexture to maintain a similar interface // as the other backends, even though there's no actual binding happening here. - VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits); + // For layer, we use the same convention as thin3d, where layer = -1 means all layers together. For texturing, that means that you + // get an array texture view. + VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int layer); void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits); diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index cedb21b49ffd..c5dcc65f408c 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -270,6 +270,7 @@ class VKPipeline : public Pipeline { } void SetDynamicUniformData(const void *data, size_t size) { + _dbg_assert_(size <= uboSize_); memcpy(ubo_, data, size); } @@ -337,10 +338,16 @@ class VKTexture : public Texture { if (vkTex_) { vkTex_->Touch(); return vkTex_->GetImageView(); - } else { - // This would be bad. - return VK_NULL_HANDLE; } + return VK_NULL_HANDLE; // This would be bad. + } + + VkImageView GetImageArrayView() { + if (vkTex_) { + vkTex_->Touch(); + return vkTex_->GetImageArrayView(); + } + return VK_NULL_HANDLE; // This would be bad. } private: @@ -405,10 +412,7 @@ class VKContext : public DrawContext { // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; - Framebuffer *GetCurrentRenderTarget() override { - return (Framebuffer *)curFramebuffer_.ptr; - } - void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) override; + void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) override; void BindCurrentFramebufferForColorInput() override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -419,7 +423,7 @@ class VKContext : public DrawContext { void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) override; void BindSamplerStates(int start, int count, SamplerState **state) override; - void BindTextures(int start, int count, Texture **textures) override; + void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) override; void BindNativeTexture(int sampler, void *nativeTexture) override; void BindPipeline(Pipeline *pipeline) override { @@ -524,6 +528,7 @@ class VKContext : public DrawContext { AutoRef boundTextures_[MAX_BOUND_TEXTURES]; AutoRef boundSamplers_[MAX_BOUND_TEXTURES]; VkImageView boundImageView_[MAX_BOUND_TEXTURES]{}; + TextureBindFlags boundTextureFlags_[MAX_BOUND_TEXTURES]; struct FrameData { FrameData() : descriptorPool("VKContext", false) { @@ -780,7 +785,6 @@ VKContext::VKContext(VulkanContext *vulkan) caps_.anisoSupported = vulkan->GetDeviceFeatures().enabled.standard.samplerAnisotropy != 0; caps_.geometryShaderSupported = vulkan->GetDeviceFeatures().enabled.standard.geometryShader != 0; caps_.tesselationShaderSupported = vulkan->GetDeviceFeatures().enabled.standard.tessellationShader != 0; - caps_.multiViewport = vulkan->GetDeviceFeatures().enabled.standard.multiViewport != 0; caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.standard.dualSrcBlend != 0; caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.standard.depthClamp != 0; caps_.clipDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderClipDistance != 0; @@ -799,6 +803,7 @@ VKContext::VKContext(VulkanContext *vulkan) caps_.fragmentShaderDepthWriteSupported = true; caps_.blendMinMaxSupported = true; caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.standard.logicOp != 0; + caps_.multiViewSupported = vulkan->GetDeviceFeatures().enabled.multiview.multiview != 0; auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties; @@ -996,7 +1001,11 @@ VkDescriptorSet VKContext::GetOrCreateDescriptorSet(VkBuffer buf) { FrameData *frame = &frame_[vulkan_->GetCurFrame()]; for (int i = 0; i < MAX_BOUND_TEXTURES; ++i) { - key.imageViews_[i] = boundTextures_[i] ? boundTextures_[i]->GetImageView() : boundImageView_[i]; + if (boundTextures_[i]) { + key.imageViews_[i] = (boundTextureFlags_[i] & TextureBindFlags::VULKAN_BIND_ARRAY) ? boundTextures_[i]->GetImageArrayView() : boundTextures_[i]->GetImageView(); + } else { + key.imageViews_[i] = boundImageView_[i]; + } key.samplers_[i] = boundSamplers_[i]; } key.buffer_ = buf; @@ -1006,7 +1015,7 @@ VkDescriptorSet VKContext::GetOrCreateDescriptorSet(VkBuffer buf) { return iter->second; } - VkDescriptorSet descSet = frame->descriptorPool.Allocate(1, &descriptorSetLayout_); + VkDescriptorSet descSet = frame->descriptorPool.Allocate(1, &descriptorSetLayout_, "thin3d_descset"); if (descSet == VK_NULL_HANDLE) { ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed"); return VK_NULL_HANDLE; @@ -1298,15 +1307,28 @@ void VKContext::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, memcpy(buf->data_ + offset, data, size); } -void VKContext::BindTextures(int start, int count, Texture **textures) { +void VKContext::BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) { _assert_(start + count <= MAX_BOUND_TEXTURES); for (int i = start; i < start + count; i++) { + _dbg_assert_(i >= 0 && i < MAX_BOUND_TEXTURES); boundTextures_[i] = static_cast(textures[i - start]); - boundImageView_[i] = boundTextures_[i] ? boundTextures_[i]->GetImageView() : GetNullTexture()->GetImageView(); + boundTextureFlags_[i] = flags; + if (boundTextures_[i]) { + // If a texture is bound, we set these up in GetOrCreateDescriptorSet too. + // But we might need to set the view here anyway so it can be queried using GetNativeObject. + if (flags & TextureBindFlags::VULKAN_BIND_ARRAY) { + boundImageView_[i] = boundTextures_[i]->GetImageArrayView(); + } else { + boundImageView_[i] = boundTextures_[i]->GetImageView(); + } + } else { + boundImageView_[i] = GetNullTexture()->GetImageView(); + } } } void VKContext::BindNativeTexture(int sampler, void *nativeTexture) { + _dbg_assert_(sampler >= 0 && sampler < MAX_BOUND_TEXTURES); boundTextures_[sampler] = nullptr; boundImageView_[sampler] = (VkImageView)nativeTexture; } @@ -1486,6 +1508,7 @@ class VKFramebuffer : public Framebuffer { _assert_msg_(fb, "Null fb in VKFramebuffer constructor"); width_ = fb->width; height_ = fb->height; + layers_ = fb->numLayers; } ~VKFramebuffer() { _assert_msg_(buf_, "Null buf_ in VKFramebuffer - double delete?"); @@ -1505,7 +1528,7 @@ class VKFramebuffer : public Framebuffer { Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) { VkCommandBuffer cmd = renderManager_.GetInitCmd(); - VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.z_stencil, desc.tag); + VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.numLayers, desc.z_stencil, desc.tag); return new VKFramebuffer(vkrfb); } @@ -1566,9 +1589,9 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass curFramebuffer_ = fb; } -void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) { +void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) { VKFramebuffer *fb = (VKFramebuffer *)fbo; - _assert_(binding < MAX_BOUND_TEXTURES); + _assert_(binding >= 0 && binding < MAX_BOUND_TEXTURES); // TODO: There are cases where this is okay, actually. But requires layout transitions and stuff - // we're not ready for this. @@ -1587,8 +1610,8 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne break; } - boundTextures_[binding] = nullptr; - boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect); + boundTextures_[binding].clear(); + boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, layer); } void VKContext::BindCurrentFramebufferForColorInput() { @@ -1651,8 +1674,13 @@ uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) { return (uint64_t)GetNullTexture()->GetImageView(); case NativeObject::TEXTURE_VIEW: return (uint64_t)(((VKTexture *)srcObject)->GetImageView()); - case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW: - return (uint64_t)curFramebuffer_->GetFB()->color.imageView; + case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW_ALL_LAYERS: + return (uint64_t)curFramebuffer_->GetFB()->color.texAllLayersView; + case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW_LAYER: { + size_t layer = (size_t)srcObject; + _dbg_assert_(layer < curFramebuffer_->Layers()); + return (uint64_t)curFramebuffer_->GetFB()->color.texLayerViews[layer]; + } default: Crash(); return 0; diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 2d9ce7f8c5a2..e61795401012 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -13,6 +13,7 @@ #include #include +#include "Common/Common.h" #include "Common/GPU/DataFormat.h" #include "Common/GPU/Shader.h" #include "Common/Data/Collections/Slice.h" @@ -241,9 +242,10 @@ enum class NativeObject { BACKBUFFER_DEPTH_TEX, FEATURE_LEVEL, INIT_COMMANDBUFFER, - BOUND_TEXTURE0_IMAGEVIEW, - BOUND_TEXTURE1_IMAGEVIEW, - BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW, + BOUND_TEXTURE0_IMAGEVIEW, // Layer etc depends on how you bound it... + BOUND_TEXTURE1_IMAGEVIEW, // Layer etc depends on how you bound it... + BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW_ALL_LAYERS, + BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW_LAYER, // use an int cast to void *srcObject to specify layer. RENDER_MANAGER, TEXTURE_VIEW, NULL_IMAGEVIEW, @@ -293,7 +295,7 @@ struct FramebufferDesc { int width; int height; int depth; - int numColorAttachments; + int numLayers; bool z_stencil; const char *tag; // For graphics debuggers }; @@ -395,6 +397,16 @@ struct AutoRef { operator T *() { return ptr; } + operator bool() const { + return ptr != nullptr; + } + + void clear() { + if (ptr) { + ptr->Release(); + ptr = nullptr; + } + } T *ptr = nullptr; }; @@ -415,9 +427,11 @@ class Framebuffer : public RefCountedObject { public: int Width() { return width_; } int Height() { return height_; } + int Layers() { return layers_; } + virtual void UpdateTag(const char *tag) {} protected: - int width_ = -1, height_ = -1; + int width_ = -1, height_ = -1, layers_ = 1; }; class Buffer : public RefCountedObject { @@ -534,7 +548,6 @@ struct DeviceCaps { bool depthRangeMinusOneToOne; // OpenGL style depth bool geometryShaderSupported; bool tesselationShaderSupported; - bool multiViewport; bool dualSourceBlend; bool logicOpSupported; bool depthClampSupported; @@ -553,6 +566,7 @@ struct DeviceCaps { bool fragmentShaderDepthWriteSupported; bool textureDepthSupported; bool blendMinMaxSupported; + bool multiViewSupported; std::string deviceName; // The device name to use when creating the thin3d context, to get the same one. }; @@ -593,6 +607,14 @@ struct RenderPassInfo { const char *tag; }; +const int ALL_LAYERS = -1; + +enum class TextureBindFlags { + NONE = 0, + VULKAN_BIND_ARRAY = 1, +}; +ENUM_CLASS_BITOPS(TextureBindFlags); + class DrawContext { public: virtual ~DrawContext(); @@ -654,11 +676,11 @@ class DrawContext { // These functions should be self explanatory. // Binding a zero render target means binding the backbuffer. + // If an fbo has two layers, we bind for stereo rendering ALWAYS. There's no rendering to one layer anymore. virtual void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) = 0; - virtual Framebuffer *GetCurrentRenderTarget() = 0; // binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2). - virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit) = 0; + virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) = 0; // Framebuffer fetch / input attachment support, needs to be explicit in Vulkan. virtual void BindCurrentFramebufferForColorInput() {} @@ -683,7 +705,7 @@ class DrawContext { virtual void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) = 0; virtual void BindSamplerStates(int start, int count, SamplerState **state) = 0; - virtual void BindTextures(int start, int count, Texture **textures) = 0; + virtual void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags = TextureBindFlags::NONE) = 0; virtual void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) = 0; virtual void BindIndexBuffer(Buffer *indexBuffer, int offset) = 0; diff --git a/Core/Config.cpp b/Core/Config.cpp index 1e44ed482b3b..91457df787e9 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -888,6 +888,8 @@ static ConfigSetting graphicsSettings[] = { ReportedConfigSetting("FrameSkip", &g_Config.iFrameSkip, 0, true, true), ReportedConfigSetting("FrameSkipType", &g_Config.iFrameSkipType, 0, true, true), ReportedConfigSetting("AutoFrameSkip", &g_Config.bAutoFrameSkip, false, true, true), + ConfigSetting("StereoRendering", &g_Config.bStereoRendering, false, true, true), + ConfigSetting("StereoToMonoShader", &g_Config.sStereoToMonoShader, "RedBlue", true, true), ConfigSetting("FrameRate", &g_Config.iFpsLimit1, 0, true, true), ConfigSetting("FrameRate2", &g_Config.iFpsLimit2, -1, true, true), ConfigSetting("AnalogFrameRate", &g_Config.iAnalogFpsLimit, 240, true, true), diff --git a/Core/Config.h b/Core/Config.h index 497e8266bd57..5a89c89732b0 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -241,6 +241,12 @@ struct Config { std::vector vPostShaderNames; // Off for chain end (only Off for no shader) std::map mPostShaderSetting; + + // Note that this is separate from VR stereo, though it'll share some code paths. + bool bStereoRendering; + // There can only be one, unlike regular post shaders. + std::string sStereoToMonoShader; + bool bShaderChainRequires60FPS; std::string sTextureShaderName; bool bGfxDebugOutput; diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index a784d22377d8..52c67152eff4 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -35,7 +35,7 @@ static const InputDef vsInputs[2] = { // TODO: Deduplicate with TextureShaderCommon.cpp static const SamplerDef samplers[2] = { - { 0, "tex" }, + { 0, "tex", SamplerFlags::ARRAY_ON_VULKAN }, { 1, "pal" }, }; diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index 9b342e05bc02..2b811782331d 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -37,7 +37,7 @@ static const VaryingDef varyings[1] = { }; static const SamplerDef samplers[1] = { - { 0, "tex" }, + { 0, "tex", SamplerFlags::ARRAY_ON_VULKAN }, }; const UniformDef g_draw2Duniforms[2] = { @@ -88,8 +88,8 @@ Draw2DPipelineInfo GenerateDraw2DCopyColorRect2LinFs(ShaderWriter &writer) { } Draw2DPipelineInfo GenerateDraw2DCopyDepthFs(ShaderWriter &writer) { - writer.DeclareSamplers(samplers); writer.SetFlags(ShaderWriterFlags::FS_WRITE_DEPTH); + writer.DeclareSamplers(samplers); writer.BeginFSMain(Slice::empty(), varyings); writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n"); writer.C(" gl_FragDepth = ").SampleTexture2D("tex", "v_texcoord.xy").C(".x;\n"); @@ -103,8 +103,8 @@ Draw2DPipelineInfo GenerateDraw2DCopyDepthFs(ShaderWriter &writer) { } Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { - writer.DeclareSamplers(samplers); writer.SetFlags(ShaderWriterFlags::FS_WRITE_DEPTH); + writer.DeclareSamplers(samplers); writer.BeginFSMain(Slice::empty(), varyings); writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n"); // Unlike when just copying a depth buffer, here we're generating new depth values so we'll @@ -123,8 +123,8 @@ Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { } Draw2DPipelineInfo GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) { - writer.DeclareSamplers(samplers); writer.SetFlags(ShaderWriterFlags::FS_WRITE_DEPTH); + writer.DeclareSamplers(samplers); writer.BeginFSMain(g_draw2Duniforms, varyings); writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n"); // Unlike when just copying a depth buffer, here we're generating new depth values so we'll @@ -182,6 +182,11 @@ void Draw2D::Ensure2DResources() { if (!draw2DVs_) { char *vsCode = new char[8192]; + ShaderWriterFlags flags = ShaderWriterFlags::NONE; + if (gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) { + // Hm, we're compiling the vertex shader here, probably don't need this... + flags = ShaderWriterFlags::FS_AUTO_STEREO; + } ShaderWriter writer(vsCode, shaderLanguageDesc, ShaderStage::Vertex); GenerateDraw2DVS(writer); _assert_msg_(strlen(vsCode) < 8192, "Draw2D VS length error: %d", (int)strlen(vsCode)); @@ -220,7 +225,11 @@ Draw2DPipeline *Draw2D::Create2DPipeline(std::functionGetShaderLanguageDesc(); char *fsCode = new char[8192]; - ShaderWriter writer(fsCode, shaderLanguageDesc, ShaderStage::Fragment); + ShaderWriterFlags flags = ShaderWriterFlags::NONE; + if (gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) { + flags = ShaderWriterFlags::FS_AUTO_STEREO; + } + ShaderWriter writer(fsCode, shaderLanguageDesc, ShaderStage::Fragment, Slice::empty(), flags); Draw2DPipelineInfo info = generate(writer); _assert_msg_(strlen(fsCode) < 8192, "Draw2D FS length error: %d", (int)strlen(fsCode)); @@ -315,6 +324,7 @@ void Draw2D::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCoun draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub)); if (tex) { + // This won't work since all the shaders above expect array textures on Vulkan. draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex); } draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, linearFilter ? &draw2DSamplerLinear_ : &draw2DSamplerNearest_); diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 223cd7976f79..7f09f7c390e6 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -36,6 +36,18 @@ #define WRITE(p, ...) p.F(__VA_ARGS__) +static const SamplerDef samplersMono[3] = { + { 0, "tex" }, + { 1, "fbotex", SamplerFlags::ARRAY_ON_VULKAN }, + { 2, "pal" }, +}; + +static const SamplerDef samplersStereo[3] = { + { 0, "tex", SamplerFlags::ARRAY_ON_VULKAN }, + { 1, "fbotex", SamplerFlags::ARRAY_ON_VULKAN }, + { 2, "pal" }, +}; + bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLanguageDesc &compat, Draw::Bugs bugs, uint64_t *uniformMask, FragmentShaderFlags *fragmentShaderFlags, std::string *errorString) { *uniformMask = 0; if (fragmentShaderFlags) { @@ -43,6 +55,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } errorString->clear(); + bool useStereo = id.Bit(FS_BIT_STEREO); bool highpFog = false; bool highpTexcoord = false; bool enableFragmentTestCache = gstate_c.Use(GPU_USE_FRAGMENT_TEST_CACHE); @@ -55,26 +68,33 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } bool texture3D = id.Bit(FS_BIT_3D_TEXTURE); + bool arrayTexture = id.Bit(FS_BIT_SAMPLE_ARRAY_TEXTURE); ReplaceAlphaType stencilToAlpha = static_cast(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)); - std::vector gl_exts; + std::vector extensions; if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE && gl_extensions.EXT_blend_func_extended) { - gl_exts.push_back("#extension GL_EXT_blend_func_extended : require"); + extensions.push_back("#extension GL_EXT_blend_func_extended : require"); } if (gl_extensions.EXT_gpu_shader4) { - gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable"); + extensions.push_back("#extension GL_EXT_gpu_shader4 : enable"); } if (compat.framebufferFetchExtension) { - gl_exts.push_back(compat.framebufferFetchExtension); + extensions.push_back(compat.framebufferFetchExtension); } if (gl_extensions.OES_texture_3D && texture3D) { - gl_exts.push_back("#extension GL_OES_texture_3D: enable"); + extensions.push_back("#extension GL_OES_texture_3D: enable"); } + } + + ShaderWriterFlags flags = ShaderWriterFlags::NONE; + if (useStereo) { + flags |= ShaderWriterFlags::FS_AUTO_STEREO; } - ShaderWriter p(buffer, compat, ShaderStage::Fragment, gl_exts); + ShaderWriter p(buffer, compat, ShaderStage::Fragment, extensions, flags); + p.ApplySamplerMetadata(arrayTexture ? samplersStereo : samplersMono); bool lmode = id.Bit(FS_BIT_LMODE); bool doTexture = id.Bit(FS_BIT_DO_TEXTURE); @@ -89,6 +109,15 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); + if (texture3D && arrayTexture) { + *errorString = "Invalid combination of 3D texture and array texture, shouldn't happen"; + return false; + } + if (compat.shaderLanguage != ShaderLanguage::GLSL_VULKAN && arrayTexture) { + *errorString = "We only do array textures for framebuffers in Vulkan."; + return false; + } + bool flatBug = bugs.Has(Draw::Bugs::BROKEN_FLAT_IN_SHADER) && g_Config.bVendorBugChecksEnabled; bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug; @@ -148,6 +177,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu return false; } + // Currently only used by Vulkan. + std::vector samplers; + if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) { if (useDiscardStencilBugWorkaround && !gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n"); @@ -155,13 +187,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr); if (doTexture) { - WRITE(p, "layout (binding = 0) uniform %s tex;\n", texture3D ? "sampler3D" : "sampler2D"); + WRITE(p, "layout (binding = 0) uniform %s%s tex;\n", texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : ""); } if (readFramebufferTex) { - WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n"); + // The framebuffer texture is always bound as an array. + p.C("layout (binding = 1) uniform sampler2DArray fbotex;\n"); } else if (fetchFramebuffer) { - WRITE(p, "layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n"); + p.C("layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n"); if (fragmentShaderFlags) { *fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT; } @@ -514,6 +547,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n"); } else if (compat.shaderLanguage == HLSL_D3D9) { WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); + } else if (compat.shaderLanguage == GLSL_VULKAN) { + WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec3(gl_FragCoord.x, gl_FragCoord.y, %s), 0);\n", compat.texelFetch, useStereo ? "float(gl_ViewIndex)" : "0"); } else if (!compat.texelFetch) { WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); } else { @@ -628,6 +663,18 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else { WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, u_mipBias));\n", compat.texture3D, texcoord); } + } else if (arrayTexture) { + _dbg_assert_(compat.shaderLanguage == GLSL_VULKAN); + // Used for stereo rendering. + const char *arrayIndex = useStereo ? "float(gl_ViewIndex)" : "0.0"; + if (doTextureProjection) { + // There's no textureProj for array textures, so we need to emulate it. + // Should be fine on any Vulkan-compatible hardware. + WRITE(p, " vec2 uv_proj = (%s.xy) / (%s.z);\n", texcoord, texcoord); + WRITE(p, " vec4 t = %s(tex, vec3(uv_proj, %s));\n", compat.texture, texcoord, arrayIndex); + } else { + WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, %s));\n", compat.texture, texcoord, arrayIndex); + } } else { if (doTextureProjection) { WRITE(p, " vec4 t = %sProj(tex, %s);\n", compat.texture, texcoord); diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 07f933778f1d..f7a470bb7dba 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1011,7 +1011,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe if (useBufferedRendering_) { if (vfb->fbo) { shaderManager_->DirtyLastShader(); - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "FBSwitch"); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, {Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP}, "FBSwitch"); } else { // This should only happen very briefly when toggling useBufferedRendering_. ResizeFramebufFBO(vfb, vfb->width, vfb->height, true); @@ -1140,7 +1140,7 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); if (pixelsTex) { - draw_->BindTextures(0, 1, &pixelsTex); + draw_->BindTextures(0, 1, &pixelsTex, Draw::TextureBindFlags::VULKAN_BIND_ARRAY); // TODO: Replace with draw2D_.Blit() directly. DrawActiveTexture(dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags); @@ -1153,7 +1153,7 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int } } -bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) { +bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags, int layer) { if (!framebuffer->fbo || !useBufferedRendering_) { draw_->BindTexture(stage, nullptr); gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; @@ -1173,17 +1173,17 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF if (renderCopy) { VirtualFramebuffer copyInfo = *framebuffer; copyInfo.fbo = renderCopy; - CopyFramebufferForColorTexture(©Info, framebuffer, flags); + CopyFramebufferForColorTexture(©Info, framebuffer, flags, layer); RebindFramebuffer("After BindFramebufferAsColorTexture"); - draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, layer); gpuStats.numCopiesForSelfTex++; } else { // Failed to get temp FBO? Weird. - draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, layer); } return true; } else if (framebuffer != currentRenderVfb_ || (flags & BINDFBCOLOR_FORCE_SELF) != 0) { - draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, layer); return true; } else { ERROR_LOG_REPORT_ONCE(selfTextureFail, G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags); @@ -1197,7 +1197,7 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF } } -void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags) { +void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer) { int x = 0; int y = 0; int w = src->drawnWidth; @@ -1619,7 +1619,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, shaderManager_->DirtyLastShader(); char tag[128]; size_t len = FormatFramebufferName(vfb, tag, sizeof(tag)); - vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, tag }); + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), true, tag }); if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) { NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len); } @@ -1986,7 +1986,7 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd char name[64]; snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address); textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED); - vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, name }); + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), true, name }); vfbs_.push_back(vfb); u32 byteSize = ColorBufferByteSize(vfb); @@ -2396,6 +2396,17 @@ void FramebufferManagerCommon::DestroyAllFBOs() { fbosToDelete_.clear(); } +static const char *TempFBOReasonToString(TempFBO reason) { + switch (reason) { + case TempFBO::DEPAL: return "depal"; + case TempFBO::BLIT: return "blit"; + case TempFBO::COPY: return "copy"; + case TempFBO::STENCIL: return "stencil"; + default: break; + } + return ""; +} + Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) { u64 key = ((u64)reason << 48) | ((u32)w << 16) | h; auto it = tempFBOs_.find(key); @@ -2406,8 +2417,9 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u bool z_stencil = reason == TempFBO::STENCIL; char name[128]; - snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w / renderScaleFactor_, h / renderScaleFactor_, z_stencil ? "_depth" : ""); - Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name }); + snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_); + + Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), z_stencil, name }); if (!fbo) { return nullptr; } @@ -3030,7 +3042,7 @@ void FramebufferManagerCommon::BlitUsingRaster( draw_->BindTexture(0, nullptr); // This will get optimized away in case it's already bound (in VK and GL at least..) draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster"); - draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT); + draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, Draw::ALL_LAYERS); if (destX1 == 0.0f && destY1 == 0.0f && destX2 >= destW && destY2 >= destH) { // We overwrite the whole channel of the framebuffer, so we can invalidate the current contents. @@ -3046,6 +3058,14 @@ void FramebufferManagerCommon::BlitUsingRaster( gstate_c.Dirty(DIRTY_ALL_RENDER_STATE); } +int FramebufferManagerCommon::GetFramebufferLayers() const { + int layers = 1; + if (gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) { + layers = 2; + } + return layers; +} + VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(VirtualFramebuffer *src, GEBufferFormat newFormat) { // Look for an identical framebuffer with the new format _dbg_assert_(src->fb_format != newFormat); @@ -3089,7 +3109,7 @@ VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(Vi char tag[128]; FormatFramebufferName(vfb, tag, sizeof(tag)); - vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, tag }); + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), true, tag }); vfbs_.push_back(vfb); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 5a85d93541ed..76ad0bd8f681 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -204,8 +204,6 @@ enum class TempFBO { BLIT, // For copies of framebuffers (e.g. shader blending.) COPY, - // For another type of framebuffers that can happen together with COPY (see Outrun) - REINTERPRET, // Used to copy stencil data, means we need a stencil backing. STENCIL, }; @@ -322,7 +320,7 @@ class FramebufferManagerCommon { // Otherwise it doesn't get called. void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason); - bool BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags); + bool BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags, int layer); void ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel); void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes); @@ -460,7 +458,7 @@ class FramebufferManagerCommon { // Used by ReadFramebufferToMemory and later framebuffer block copies void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag); - void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags); + void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer); void EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height); u32 ColorBufferByteSize(const VirtualFramebuffer *vfb) const; @@ -486,6 +484,8 @@ class FramebufferManagerCommon { void UpdateFramebufUsage(VirtualFramebuffer *vfb); + int GetFramebufferLayers() const; + static void SetColorUpdated(VirtualFramebuffer *dstBuffer, int skipDrawReason) { dstBuffer->memoryUpdated = false; dstBuffer->clutUpdatedBytes = 0; diff --git a/GPU/Common/PostShader.cpp b/GPU/Common/PostShader.cpp index 2d8529f356c6..6b0b94387a22 100644 --- a/GPU/Common/PostShader.cpp +++ b/GPU/Common/PostShader.cpp @@ -154,9 +154,11 @@ void LoadPostShaderInfo(Draw::DrawContext *draw, const std::vector &direct continue; } - if (section.Exists("Fragment") && section.Exists("Vertex") && strncasecmp(shaderType.c_str(), "render", shaderType.size()) == 0) { + if (section.Exists("Fragment") && section.Exists("Vertex") && + (strncasecmp(shaderType.c_str(), "render", shaderType.size()) == 0 || + strncasecmp(shaderType.c_str(), "StereoToMono", shaderType.size()) == 0)) { // Valid shader! - ShaderInfo info; + ShaderInfo info{}; std::string temp; info.section = section.name(); @@ -176,6 +178,12 @@ void LoadPostShaderInfo(Draw::DrawContext *draw, const std::vector &direct if (info.parent == "Off") info.parent.clear(); + if (strncasecmp(shaderType.c_str(), "stereotomono", shaderType.size()) == 0) { + info.isStereo = true; + info.isUpscalingFilter = false; + info.parent.clear(); + } + for (size_t i = 0; i < ARRAY_SIZE(info.settings); ++i) { auto &setting = info.settings[i]; section.Get(StringFromFormat("SettingName%d", i + 1).c_str(), &setting.name, ""); @@ -206,7 +214,7 @@ void LoadPostShaderInfo(Draw::DrawContext *draw, const std::vector &direct } } else if (section.Exists("Compute") && strncasecmp(shaderType.c_str(), "texture", shaderType.size()) == 0) { // This is a texture shader. - TextureShaderInfo info; + TextureShaderInfo info{}; std::string temp; info.section = section.name(); section.Get("Name", &info.name, section.name().c_str()); @@ -216,6 +224,8 @@ void LoadPostShaderInfo(Draw::DrawContext *draw, const std::vector &direct if (info.scaleFactor >= 2 && info.scaleFactor < 8) { appendTextureShader(info); } + } else if (!section.name().empty()) { + WARN_LOG(G3D, "Unrecognized shader type '%s' or invalid shader in section '%s'", shaderType.c_str(), section.name().c_str()); } } } diff --git a/GPU/Common/PostShader.h b/GPU/Common/PostShader.h index 5226638078d0..849ac884f34d 100644 --- a/GPU/Common/PostShader.h +++ b/GPU/Common/PostShader.h @@ -39,6 +39,8 @@ struct ShaderInfo { bool outputResolution; // Use x1 rendering res + nearest screen scaling filter bool isUpscalingFilter; + // Is used to post-process stereo-rendering to mono, like red/blue. + bool isStereo; // Use 2x display resolution for supersampling with blurry shaders. int SSAAFilterLevel; // Force constant/max refresh for animated filters diff --git a/GPU/Common/PresentationCommon.cpp b/GPU/Common/PresentationCommon.cpp index ce3e155c930d..025888896cd1 100644 --- a/GPU/Common/PresentationCommon.cpp +++ b/GPU/Common/PresentationCommon.cpp @@ -34,6 +34,7 @@ #include "Core/HW/Display.h" #include "GPU/Common/PostShader.h" #include "GPU/Common/PresentationCommon.h" +#include "GPU/GPUState.h" #include "Common/GPU/ShaderTranslation.h" struct Vertex { @@ -207,7 +208,7 @@ void PresentationCommon::CalculatePostShaderUniforms(int bufferWidth, int buffer uniforms->gl_HalfPixel[0] = u_pixel_delta * 0.5f; uniforms->gl_HalfPixel[1] = v_pixel_delta * 0.5f; - uniforms->setting[0] = g_Config.mPostShaderSetting[shaderInfo->section + "SettingValue1"];; + uniforms->setting[0] = g_Config.mPostShaderSetting[shaderInfo->section + "SettingValue1"]; uniforms->setting[1] = g_Config.mPostShaderSetting[shaderInfo->section + "SettingValue2"]; uniforms->setting[2] = g_Config.mPostShaderSetting[shaderInfo->section + "SettingValue3"]; uniforms->setting[3] = g_Config.mPostShaderSetting[shaderInfo->section + "SettingValue4"]; @@ -226,7 +227,23 @@ static std::string ReadShaderSrc(const Path &filename) { } // Note: called on resize and settings changes. +// Also takes care of making sure the appropriate stereo shader is compiled. bool PresentationCommon::UpdatePostShader() { + DestroyStereoShader(); + + if (gstate_c.Use(GPU_USE_SIMPLE_STEREO_PERSPECTIVE)) { + const ShaderInfo *stereoShaderInfo = GetPostShaderInfo(g_Config.sStereoToMonoShader); + bool result = CompilePostShader(stereoShaderInfo, &stereoPipeline_); + if (!result) { + // We won't have a stereo shader. We have to check for this later. + delete stereoShaderInfo_; + stereoShaderInfo_ = nullptr; + stereoPipeline_ = nullptr; + } else { + stereoShaderInfo_ = new ShaderInfo(*stereoShaderInfo); + } + } + std::vector shaderInfo; if (!g_Config.vPostShaderNames.empty()) { ReloadAllPostShaderInfo(draw_); @@ -517,6 +534,7 @@ void PresentationCommon::DestroyDeviceObjects() { restorePostShader_ = usePostShader_; DestroyPostShader(); + DestroyStereoShader(); } void PresentationCommon::DestroyPostShader() { @@ -529,6 +547,12 @@ void PresentationCommon::DestroyPostShader() { postShaderFBOUsage_.clear(); } +void PresentationCommon::DestroyStereoShader() { + DoRelease(stereoPipeline_); + delete stereoShaderInfo_; + stereoShaderInfo_ = nullptr; +} + Draw::ShaderModule *PresentationCommon::CompileShaderModule(ShaderStage stage, ShaderLanguage lang, const std::string &src, std::string *errorString) const { std::string translated = src; if (lang != lang_) { @@ -562,13 +586,28 @@ void PresentationCommon::SourceFramebuffer(Draw::Framebuffer *fb, int bufferWidt srcHeight_ = bufferHeight; } -void PresentationCommon::BindSource(int binding) { +// Return value is if stereo binding succeeded. +bool PresentationCommon::BindSource(int binding, bool bindStereo) { if (srcTexture_) { draw_->BindTexture(binding, srcTexture_); + return false; } else if (srcFramebuffer_) { - draw_->BindFramebufferAsTexture(srcFramebuffer_, binding, Draw::FB_COLOR_BIT); + if (bindStereo) { + if (srcFramebuffer_->Layers() > 1) { + draw_->BindFramebufferAsTexture(srcFramebuffer_, binding, Draw::FB_COLOR_BIT, Draw::ALL_LAYERS); + return true; + } else { + // Single layer. This might be from a post shader and those don't yet support stereo. + draw_->BindFramebufferAsTexture(srcFramebuffer_, binding, Draw::FB_COLOR_BIT, 0); + return false; + } + } else { + draw_->BindFramebufferAsTexture(srcFramebuffer_, binding, Draw::FB_COLOR_BIT, 0); + return false; + } } else { _assert_(false); + return false; } } @@ -584,7 +623,9 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u // This should auto-disable usePostShader_ and call ShowPostShaderError(). bool useNearest = flags & OutputFlags::NEAREST; - const bool usePostShader = usePostShader_ && !(flags & OutputFlags::RB_SWIZZLE); + bool useStereo = gstate_c.Use(GPU_USE_SIMPLE_STEREO_PERSPECTIVE) && stereoPipeline_ != nullptr; // TODO: Also check that the backend has support for it. + + const bool usePostShader = usePostShader_ && !useStereo && !(flags & OutputFlags::RB_SWIZZLE); const bool isFinalAtOutputResolution = usePostShader && postShaderFramebuffers_.size() < postShaderPipelines_.size(); Draw::Framebuffer *postShaderOutput = nullptr; int lastWidth = srcWidth_; @@ -659,7 +700,7 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u } } - if (isFinalAtOutputResolution) { + if (isFinalAtOutputResolution || useStereo) { // In this mode, we ignore the g_display_rot_matrix. Apply manually. if (g_display_rotation != DisplayRotation::ROTATE_0) { for (int i = 0; i < 4; i++) { @@ -685,13 +726,13 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u PostShaderUniforms uniforms; const auto performShaderPass = [&](const ShaderInfo *shaderInfo, Draw::Framebuffer *postShaderFramebuffer, Draw::Pipeline *postShaderPipeline) { if (postShaderOutput) { - draw_->BindFramebufferAsTexture(postShaderOutput, 0, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(postShaderOutput, 0, Draw::FB_COLOR_BIT, 0); } else { - BindSource(0); + BindSource(0, false); } - BindSource(1); + BindSource(1, false); if (shaderInfo->usePreviousFrame) - draw_->BindFramebufferAsTexture(previousFramebuffer, 2, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(previousFramebuffer, 2, Draw::FB_COLOR_BIT, 0); int nextWidth, nextHeight; draw_->GetFramebufferDimensions(postShaderFramebuffer, &nextWidth, &nextHeight); @@ -768,26 +809,38 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u performShaderPass(shaderInfo, postShaderFramebuffer, postShaderPipeline); } - Draw::Pipeline *pipeline = (flags & OutputFlags::RB_SWIZZLE) ? texColorRBSwizzle_ : texColor_; - if (isFinalAtOutputResolution && previousFramebuffers_.empty()) { - pipeline = postShaderPipelines_.back(); - } - draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "FinalBlit"); draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_); - draw_->BindPipeline(pipeline); + Draw::Pipeline *pipeline = (flags & OutputFlags::RB_SWIZZLE) ? texColorRBSwizzle_ : texColor_; - if (postShaderOutput) { - draw_->BindFramebufferAsTexture(postShaderOutput, 0, Draw::FB_COLOR_BIT); + if (useStereo) { + draw_->BindPipeline(stereoPipeline_); + if (!BindSource(0, true)) { + // Fall back + draw_->BindPipeline(texColor_); + useStereo = false; // Otherwise we end up uploading the wrong uniforms + } } else { - BindSource(0); + if (isFinalAtOutputResolution && previousFramebuffers_.empty()) { + pipeline = postShaderPipelines_.back(); + } + + draw_->BindPipeline(pipeline); + if (postShaderOutput) { + draw_->BindFramebufferAsTexture(postShaderOutput, 0, Draw::FB_COLOR_BIT, 0); + } else { + BindSource(0, false); + } } - BindSource(1); + BindSource(1, false); if (isFinalAtOutputResolution && previousFramebuffers_.empty()) { CalculatePostShaderUniforms(lastWidth, lastHeight, (int)rc.w, (int)rc.h, &postShaderInfo_.back(), &uniforms); draw_->UpdateDynamicUniformBuffer(&uniforms, sizeof(uniforms)); + } else if (useStereo) { + CalculatePostShaderUniforms(lastWidth, lastHeight, (int)rc.w, (int)rc.h, stereoShaderInfo_, &uniforms); + draw_->UpdateDynamicUniformBuffer(&uniforms, sizeof(uniforms)); } else { Draw::VsTexColUB ub{}; memcpy(ub.WorldViewProj, g_display_rot_matrix.m, sizeof(float) * 16); @@ -809,6 +862,8 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u CardboardSettings cardboardSettings; GetCardboardSettings(&cardboardSettings); if (cardboardSettings.enabled) { + // TODO: This could actually support stereo now, with an appropriate shader. + // This is what the left eye sees. setViewport(cardboardSettings.leftEyeXPosition, cardboardSettings.screenYPosition, cardboardSettings.screenWidth, cardboardSettings.screenHeight); draw_->DrawIndexed(6, 0); diff --git a/GPU/Common/PresentationCommon.h b/GPU/Common/PresentationCommon.h index c3faff6568bd..d100230a466e 100644 --- a/GPU/Common/PresentationCommon.h +++ b/GPU/Common/PresentationCommon.h @@ -107,7 +107,9 @@ class PresentationCommon { protected: void CreateDeviceObjects(); void DestroyDeviceObjects(); + void DestroyPostShader(); + void DestroyStereoShader(); static void ShowPostShaderError(const std::string &errorString); @@ -117,7 +119,7 @@ class PresentationCommon { bool BuildPostShader(const ShaderInfo *shaderInfo, const ShaderInfo *next, Draw::Pipeline **outPipeline); bool AllocateFramebuffer(int w, int h); - void BindSource(int binding); + bool BindSource(int binding, bool bindStereo); void GetCardboardSettings(CardboardSettings *cardboardSettings) const; void CalculatePostShaderUniforms(int bufferWidth, int bufferHeight, int targetWidth, int targetHeight, const ShaderInfo *shaderInfo, PostShaderUniforms *uniforms) const; @@ -134,6 +136,10 @@ class PresentationCommon { std::vector postShaderFramebuffers_; std::vector postShaderInfo_; std::vector previousFramebuffers_; + + Draw::Pipeline *stereoPipeline_ = nullptr; + ShaderInfo *stereoShaderInfo_ = nullptr; + int previousIndex_ = 0; PostShaderUniforms previousUniforms_{}; diff --git a/GPU/Common/ReinterpretFramebuffer.cpp b/GPU/Common/ReinterpretFramebuffer.cpp index b14e324d0d01..ccb58c1c9b5b 100644 --- a/GPU/Common/ReinterpretFramebuffer.cpp +++ b/GPU/Common/ReinterpretFramebuffer.cpp @@ -14,14 +14,13 @@ static const VaryingDef varyings[1] = { }; static const SamplerDef samplers[1] = { - { 0, "tex" } + { 0, "tex", SamplerFlags::ARRAY_ON_VULKAN } }; // Requires full size integer math. It would be possible to make a floating point-only version with lots of // modulo and stuff, might do it one day. Draw2DPipelineInfo GenerateReinterpretFragmentShader(ShaderWriter &writer, GEBufferFormat from, GEBufferFormat to) { writer.HighPrecisionFloat(); - writer.DeclareSamplers(samplers); if (writer.Lang().bitwiseOps) { diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 78483cf8a5c4..aee52476cf76 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -62,6 +62,8 @@ std::string VertexShaderDesc(const VShaderID &id) { if (id.Bit(VS_BIT_NORM_REVERSE_TESS)) desc << "TessRevN "; if (id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) desc << "Cull "; + if (id.Bit(VS_BIT_SIMPLE_STEREO)) desc << "SimpleStereo "; + return desc.str(); } @@ -92,6 +94,10 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform, id.SetBit(VS_BIT_HAS_COLOR, hasColor); id.SetBit(VS_BIT_VERTEX_RANGE_CULLING, vertexRangeCulling); + if (!isModeThrough && gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) { + id.SetBit(VS_BIT_SIMPLE_STEREO); + } + if (doTexture) { id.SetBit(VS_BIT_DO_TEXTURE); @@ -251,7 +257,8 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_TEST_DISCARD_TO_ZERO)) desc << "TestDiscardToZero "; if (id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL)) desc << "StencilDiscardWorkaround "; if (id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4) != GE_LOGIC_COPY) desc << "ReplaceLogic "; - + if (id.Bit(FS_BIT_SAMPLE_ARRAY_TEXTURE)) desc << "TexArray "; + if (id.Bit(FS_BIT_STEREO)) desc << "Stereo "; return desc.str(); } @@ -359,9 +366,18 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip id.SetBits(FS_BIT_BLENDFUNC_B, 4, gstate.getBlendFuncB()); } id.SetBit(FS_BIT_FLATSHADE, doFlatShading); - id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask); + // All framebuffers are array textures in Vulkan now. + if (gstate_c.arrayTexture && g_Config.iGPUBackend == (int)GPUBackend::VULKAN) { + id.SetBit(FS_BIT_SAMPLE_ARRAY_TEXTURE); + } + + // Stereo support + if (gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) { + id.SetBit(FS_BIT_STEREO); + } + if (g_Config.bVendorBugChecksEnabled && bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) { bool stencilWithoutDepth = !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled()); if (stencilWithoutDepth) { diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 2edf271b6d6d..effe3307634e 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -15,7 +15,7 @@ enum VShaderBit : uint8_t { VS_BIT_HAS_COLOR = 3, VS_BIT_DO_TEXTURE = 4, VS_BIT_VERTEX_RANGE_CULLING = 5, - // 6 is free, + VS_BIT_SIMPLE_STEREO = 6, // 7 is free. VS_BIT_USE_HW_TRANSFORM = 8, VS_BIT_HAS_NORMAL = 9, // conditioned on hw transform @@ -98,6 +98,8 @@ enum FShaderBit : uint8_t { FS_BIT_COLOR_WRITEMASK = 50, FS_BIT_REPLACE_LOGIC_OP = 51, // 4 bits. GE_LOGIC_COPY means no-op/off. FS_BIT_SHADER_DEPAL_MODE = 55, // 2 bits (ShaderDepalMode) + FS_BIT_SAMPLE_ARRAY_TEXTURE = 57, // For multiview, framebuffers are array textures and we need to sample the two layers correctly. + FS_BIT_STEREO = 58, }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index c74617931681..96cfe98411ea 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -131,6 +131,16 @@ static const char * const ub_vs_bonesStr = R"( mat3x4 u_bone0; mat3x4 u_bone1; mat3x4 u_bone2; mat3x4 u_bone3; mat3x4 u_bone4; mat3x4 u_bone5; mat3x4 u_bone6; mat3x4 u_bone7; mat3x4 u_bone8; )"; + +static const char * const ub_frame_globalstr = +R"( vec4 unused; +)"; + +// VR stuff will go here. +struct UB_FrameGlobal { + float unused[4]; +}; + void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ); void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 3877b2df226e..e660bb20f8a0 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -371,6 +371,8 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { if (!Memory::IsValidAddress(texaddr)) { // Bind a null texture and return. Unbind(); + gstate_c.SetTextureIs3D(false); + gstate_c.SetTextureIsArray(false); return nullptr; } @@ -528,6 +530,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { gstate_c.curTextureWidth = w; gstate_c.curTextureHeight = h; gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); + gstate_c.SetTextureIsArray(false); if (rehash) { // Update in case any of these changed. entry->sizeInRAM = (textureBitsPerPixel[texFormat] * bufw * h / 2) / 8; @@ -636,6 +639,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { gstate_c.curTextureWidth = w; gstate_c.curTextureHeight = h; gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); + gstate_c.SetTextureIsArray(false); // Ordinary 2D textures still aren't used by array view in VK. We probably might as well, though, at this point.. failedTexture_ = false; nextTexture_ = entry; @@ -1126,6 +1130,7 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) } gstate_c.SetTextureIs3D(false); + gstate_c.SetTextureIsArray(true); nextNeedsRehash_ = false; nextNeedsChange_ = false; @@ -1295,7 +1300,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { desc.height = 1; desc.depth = 1; desc.z_stencil = false; - desc.numColorAttachments = 1; + desc.numLayers = 1; desc.tag = "dynamic_clut"; dynamicClutFbo_ = draw_->CreateFramebuffer(desc); desc.tag = "dynamic_clut_temp"; @@ -2020,11 +2025,13 @@ void TextureCacheCommon::ApplyTexture() { entry->lastFrame = gpuStats.numFlips; gstate_c.SetTextureFullAlpha(false); gstate_c.SetTextureIs3D(false); + gstate_c.SetTextureIsArray(false); } else { entry->lastFrame = gpuStats.numFlips; BindTexture(entry); gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL); gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); + gstate_c.SetTextureIsArray(false); } } @@ -2121,7 +2128,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture. BindAsClutTexture(clutTexture.texture, smoothedDepal); - framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); + framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET, Draw::ALL_LAYERS); // Vulkan needs to do some extra work here to pick out the native handle from Draw. BoundFramebufferTexture(); @@ -2197,7 +2204,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer Draw::Viewport vp{ 0.0f, 0.0f, (float)depalWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f }; draw_->SetViewports(1, &vp); - draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, Draw::ALL_LAYERS); draw_->BindTexture(1, clutTexture.texture); Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false); Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal); @@ -2213,7 +2220,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->BindTexture(0, nullptr); framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer"); - draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, Draw::ALL_LAYERS); BoundFramebufferTexture(); const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); @@ -2226,7 +2233,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer shaderManager_->DirtyLastShader(); } else { framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer"); - framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); + framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET, Draw::ALL_LAYERS); BoundFramebufferTexture(); gstate_c.SetUseShaderDepal(ShaderDepalMode::OFF); @@ -2299,7 +2306,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) { draw_->SetViewports(1, &vp); draw_->BindNativeTexture(0, GetNativeTextureView(entry)); - draw_->BindFramebufferAsTexture(dynamicClutFbo_, 1, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(dynamicClutFbo_, 1, Draw::FB_COLOR_BIT, 0); Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false); Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(false); draw_->BindSamplerStates(0, 1, &nearest); @@ -2314,7 +2321,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) { draw_->BindTexture(0, nullptr); framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer"); - draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT); + draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0); BoundFramebufferTexture(); const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); diff --git a/GPU/Common/TextureShaderCommon.cpp b/GPU/Common/TextureShaderCommon.cpp index 5919d5442ea0..a1ba51de53b9 100644 --- a/GPU/Common/TextureShaderCommon.cpp +++ b/GPU/Common/TextureShaderCommon.cpp @@ -34,7 +34,7 @@ static const VaryingDef varyings[1] = { }; static const SamplerDef samplers[2] = { - { 0, "tex" }, + { 0, "tex", SamplerFlags::ARRAY_ON_VULKAN }, { 1, "pal" }, }; diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 5b7e32127724..cbf51f1b24be 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -126,34 +126,47 @@ static const char * const boneWeightDecl[9] = { "layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec4 w2;\n", }; -bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguageDesc &compat, Draw::Bugs bugs, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString) { +bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguageDesc &compat, Draw::Bugs bugs, uint32_t *attrMask, uint64_t *uniformMask, VertexShaderFlags *vertexShaderFlags, std::string *errorString) { *attrMask = 0; *uniformMask = 0; + if (vertexShaderFlags) { + *vertexShaderFlags = (VertexShaderFlags)0; + } bool highpFog = false; bool highpTexcoord = false; - std::vector gl_exts; + std::vector extensions; if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { if (gl_extensions.EXT_gpu_shader4) { - gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable"); + extensions.push_back("#extension GL_EXT_gpu_shader4 : enable"); } bool useClamp = gstate_c.Use(GPU_USE_DEPTH_CLAMP) && !id.Bit(VS_BIT_IS_THROUGH); if (gl_extensions.EXT_clip_cull_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) { - gl_exts.push_back("#extension GL_EXT_clip_cull_distance : enable"); + extensions.push_back("#extension GL_EXT_clip_cull_distance : enable"); } if (gl_extensions.APPLE_clip_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) { - gl_exts.push_back("#extension GL_APPLE_clip_distance : enable"); + extensions.push_back("#extension GL_APPLE_clip_distance : enable"); } if (gl_extensions.ARB_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { - gl_exts.push_back("#extension GL_ARB_cull_distance : enable"); + extensions.push_back("#extension GL_ARB_cull_distance : enable"); } if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY) && gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) { - gl_exts.push_back("#extension GL_OVR_multiview2 : enable\nlayout(num_views=2) in;"); + extensions.push_back("#extension GL_OVR_multiview2 : enable\nlayout(num_views=2) in;"); + } + } + + bool useSimpleStereo = id.Bit(VS_BIT_SIMPLE_STEREO); + + if (useSimpleStereo) { + if (compat.shaderLanguage != ShaderLanguage::GLSL_VULKAN) { + *errorString = "Multiview only supported with Vulkan for now"; + return false; } + extensions.push_back("#extension GL_EXT_multiview : enable"); } - ShaderWriter p(buffer, compat, ShaderStage::Vertex, gl_exts); + ShaderWriter p(buffer, compat, ShaderStage::Vertex, extensions); bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH); bool lmode = id.Bit(VS_BIT_LMODE); @@ -1338,6 +1351,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " }\n"); } + if (useSimpleStereo && useHWTransform) { + p.C(" float zFactor = 0.2 * float(gl_ViewIndex * 2 - 1);\n"); + p.C(" float zFocus = 0.0;\n"); + p.C(" gl_Position.x += (-gl_Position.z - zFocus) * zFactor;\n"); + } + if (needsZWHack) { // See comment in thin3d_vulkan.cpp. WRITE(p, " if (%sgl_Position.z == %sgl_Position.w) %sgl_Position.z *= 0.999999;\n", diff --git a/GPU/Common/VertexShaderGenerator.h b/GPU/Common/VertexShaderGenerator.h index c1899001e6c1..9d3707411cdb 100644 --- a/GPU/Common/VertexShaderGenerator.h +++ b/GPU/Common/VertexShaderGenerator.h @@ -25,7 +25,13 @@ struct VShaderID; -bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString); +// Can technically be deduced from the vertex shader ID, but this is safer. +enum class VertexShaderFlags : u32 { + MULTI_VIEW = 1, +}; +ENUM_CLASS_BITOPS(VertexShaderFlags); + +bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, uint32_t *attrMask, uint64_t *uniformMask, VertexShaderFlags *vertexShaderFlags, std::string *errorString); // D3D9 constants. enum { diff --git a/GPU/D3D11/ShaderManagerD3D11.cpp b/GPU/D3D11/ShaderManagerD3D11.cpp index 0560b529411f..0c9fec1817ea 100644 --- a/GPU/D3D11/ShaderManagerD3D11.cpp +++ b/GPU/D3D11/ShaderManagerD3D11.cpp @@ -212,7 +212,7 @@ void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader ** std::string genErrorString; uint32_t attrMask; uint64_t uniformMask; - GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &genErrorString); + GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &genErrorString); _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_)); vs = new D3D11VertexShader(device_, featureLevel_, VSID, codeBuffer_, vertType, useHWTransform); vsCache_[VSID] = vs; diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index a902401122d4..efbbcad31bae 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -159,7 +159,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY, 0); // No sampler required, we do a plain Load in the pixel shader. fboTexBound_ = true; fboTexBindState = FBO_TEX_NONE; diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 6e20ec45e07e..1e56e5ab37e4 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -581,7 +581,7 @@ VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellat std::string genErrorString; uint32_t attrMask; uint64_t uniformMask; - if (GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &genErrorString)) { + if (GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &genErrorString)) { vs = new VSShader(device_, VSID, codeBuffer_, useHWTransform); } if (!vs || vs->Failed()) { @@ -606,7 +606,7 @@ VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellat // Can still work with software transform. uint32_t attrMask; uint64_t uniformMask; - bool success = GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &genErrorString); + bool success = GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &genErrorString); _assert_(success); vs = new VSShader(device_, VSID, codeBuffer_, false); } diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index bc1af41143a9..951a401189f3 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -101,7 +101,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) { if (fboTexBindState_ = FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY, 0); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); @@ -139,7 +139,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) { if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY, Draw::ALL_LAYERS); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index 5f783e48c779..7be1adf9d348 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -208,7 +208,7 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; draw_->SetViewports(1, &viewport); - draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_DEPTH_BIT); + draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_DEPTH_BIT, 0); draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &depthReadbackSampler_); // We must bind the program after starting the render pass. @@ -326,7 +326,7 @@ bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, i Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; draw_->SetViewports(1, &viewport); - draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_STENCIL_BIT); + draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_STENCIL_BIT, 0); draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilReadbackSampler_); // We must bind the program after starting the render pass. diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 3bdd9c4f86a9..e4a6350de1b0 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -743,7 +743,7 @@ Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) { uint32_t attrMask; uint64_t uniformMask; std::string errorString; - if (!GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &errorString)) { + if (!GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &errorString)) { ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str()); return nullptr; } diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index 0b8f45c3c67b..f80a3f54905e 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -158,7 +158,7 @@ void DrawEngineGLES::ApplyDrawState(int prim) { // fboTexNeedsBind_ won't be set if we can read directly from the target. if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY, 0); // If we are rendering at a higher resolution, linear is probably best for the dest color. renderManager->SetTextureSampler(1, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_LINEAR, GL_LINEAR, 0.0f); fboTexBound_ = true; diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 808ce53635ed..860dbc38f306 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -652,6 +652,26 @@ true true + + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index e110c20b8bf7..b8a990374ae4 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -534,5 +534,11 @@ Shaders + + Shaders + + + Shaders + \ No newline at end of file diff --git a/GPU/GPUState.h b/GPU/GPUState.h index e7a15e953619..03473712128d 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -562,6 +562,12 @@ struct GPUStateCache { Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0)); } } + void SetTextureIsArray(bool isArrayTexture) { // VK only + if (arrayTexture != isArrayTexture) { + arrayTexture = isArrayTexture; + Dirty(DIRTY_FRAGMENTSHADER_STATE); + } + } u32 useFlags; @@ -583,6 +589,7 @@ struct GPUStateCache { bool bgraTexture; bool needShaderTexClamp; + bool arrayTexture; float morphWeights[8]; u32 deferredVertTypeDirty; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index b24e4b00214e..8af2727736d7 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -42,6 +42,7 @@ #include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/SoftwareTransformCommon.h" #include "GPU/Common/DrawEngineCommon.h" +#include "GPU/Common/ShaderUniforms.h" #include "GPU/Debugger/Debugger.h" #include "GPU/Vulkan/DrawEngineVulkan.h" #include "GPU/Vulkan/TextureCacheVulkan.h" @@ -184,8 +185,9 @@ void DrawEngineVulkan::InitDeviceObjects() { VkPipelineLayoutCreateInfo pl{ VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; pl.pPushConstantRanges = nullptr; pl.pushConstantRangeCount = 0; - pl.setLayoutCount = 1; - pl.pSetLayouts = &descriptorSetLayout_; + VkDescriptorSetLayout layouts[1] = { descriptorSetLayout_ }; + pl.setLayoutCount = ARRAY_SIZE(layouts); + pl.pSetLayouts = layouts; pl.flags = 0; res = vkCreatePipelineLayout(device, &pl, nullptr, &pipelineLayout_); _dbg_assert_(VK_SUCCESS == res); @@ -303,6 +305,7 @@ void DrawEngineVulkan::BeginFrame() { frame->pushIndex->Reset(); VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); + frame->pushUBO->Begin(vulkan); frame->pushVertex->Begin(vulkan); frame->pushIndex->Begin(vulkan); @@ -401,7 +404,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView // Didn't find one in the frame descriptor set cache, let's make a new one. // We wipe the cache on every frame. - VkDescriptorSet desc = frame.descPool.Allocate(1, &descriptorSetLayout_); + VkDescriptorSet desc = frame.descPool.Allocate(1, &descriptorSetLayout_, "game_descset"); // Even in release mode, this is bad. _assert_msg_(desc != VK_NULL_HANDLE, "Ran out of descriptor space in pool. sz=%d", (int)frame.descSets.size()); @@ -579,10 +582,6 @@ void DrawEngineVulkan::DoFlush() { // Always use software for flat shading to fix the provoking index. bool useHWTransform = CanUseHardwareTransform(prim) && (tess || gstate.getShadeMode() != GE_SHADE_FLAT); - VulkanVertexShader *vshader = nullptr; - VulkanFragmentShader *fshader = nullptr; - VulkanGeometryShader *gshader = nullptr; - uint32_t ibOffset; uint32_t vbOffset; @@ -776,6 +775,10 @@ void DrawEngineVulkan::DoFlush() { ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_); } + VulkanVertexShader *vshader = nullptr; + VulkanFragmentShader *fshader = nullptr; + VulkanGeometryShader *gshader = nullptr; + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat); // usehwtransform if (!vshader) { // We're screwed. @@ -906,6 +909,11 @@ void DrawEngineVulkan::DoFlush() { if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) { ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_); } + + VulkanVertexShader *vshader = nullptr; + VulkanFragmentShader *fshader = nullptr; + VulkanGeometryShader *gshader = nullptr; + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat); // usehwtransform _dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader"); VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, false, 0); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index ebf50e33b51d..258f95ae6d75 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -211,8 +211,9 @@ class DrawEngineVulkan : public DrawEngineCommon { Draw::DrawContext *draw_; - // We use a single descriptor set layout for all PSP draws. + // We use a shared descriptor set layout for all PSP draws. VkDescriptorSetLayout descriptorSetLayout_; + VkPipelineLayout pipelineLayout_; VulkanPipeline *lastPipeline_; VkDescriptorSet lastDs_ = VK_NULL_HANDLE; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index c46b8b28128a..12d0328f989b 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -268,6 +268,18 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { features |= GPU_ROUND_DEPTH_TO_16BIT; } + if (g_Config.bStereoRendering && draw_->GetDeviceCaps().multiViewSupported) { + features |= GPU_USE_SINGLE_PASS_STEREO; + features |= GPU_USE_SIMPLE_STEREO_PERSPECTIVE; + + features &= ~GPU_USE_FRAMEBUFFER_FETCH; // Need to figure out if this can be supported with multiview rendering + if (features & GPU_USE_GS_CULLING) { + // Many devices that support stereo and GS don't support GS during stereo. + features &= ~GPU_USE_GS_CULLING; + features |= GPU_USE_VS_RANGE_CULLING; + } + } + return features; } @@ -328,14 +340,11 @@ void GPU_Vulkan::BuildReportingInfo() { const auto &available = vulkan->GetDeviceFeatures().available; #define CHECK_BOOL_FEATURE(n) do { if (available.standard.n) { featureNames += ", " #n; } } while (false) +#define CHECK_BOOL_FEATURE_MULTIVIEW(n) do { if (available.multiview.n) { featureNames += ", " #n; } } while (false) std::string featureNames = ""; - CHECK_BOOL_FEATURE(robustBufferAccess); CHECK_BOOL_FEATURE(fullDrawIndexUint32); - CHECK_BOOL_FEATURE(imageCubeArray); - CHECK_BOOL_FEATURE(independentBlend); CHECK_BOOL_FEATURE(geometryShader); - CHECK_BOOL_FEATURE(tessellationShader); CHECK_BOOL_FEATURE(sampleRateShading); CHECK_BOOL_FEATURE(dualSrcBlend); CHECK_BOOL_FEATURE(logicOp); @@ -343,46 +352,23 @@ void GPU_Vulkan::BuildReportingInfo() { CHECK_BOOL_FEATURE(drawIndirectFirstInstance); CHECK_BOOL_FEATURE(depthClamp); CHECK_BOOL_FEATURE(depthBiasClamp); - CHECK_BOOL_FEATURE(fillModeNonSolid); CHECK_BOOL_FEATURE(depthBounds); - CHECK_BOOL_FEATURE(alphaToOne); - CHECK_BOOL_FEATURE(multiViewport); CHECK_BOOL_FEATURE(samplerAnisotropy); CHECK_BOOL_FEATURE(textureCompressionETC2); CHECK_BOOL_FEATURE(textureCompressionASTC_LDR); CHECK_BOOL_FEATURE(textureCompressionBC); CHECK_BOOL_FEATURE(occlusionQueryPrecise); CHECK_BOOL_FEATURE(pipelineStatisticsQuery); - CHECK_BOOL_FEATURE(vertexPipelineStoresAndAtomics); CHECK_BOOL_FEATURE(fragmentStoresAndAtomics); CHECK_BOOL_FEATURE(shaderTessellationAndGeometryPointSize); - CHECK_BOOL_FEATURE(shaderImageGatherExtended); - CHECK_BOOL_FEATURE(shaderStorageImageExtendedFormats); CHECK_BOOL_FEATURE(shaderStorageImageMultisample); - CHECK_BOOL_FEATURE(shaderStorageImageReadWithoutFormat); - CHECK_BOOL_FEATURE(shaderStorageImageWriteWithoutFormat); - CHECK_BOOL_FEATURE(shaderUniformBufferArrayDynamicIndexing); CHECK_BOOL_FEATURE(shaderSampledImageArrayDynamicIndexing); - CHECK_BOOL_FEATURE(shaderStorageBufferArrayDynamicIndexing); - CHECK_BOOL_FEATURE(shaderStorageImageArrayDynamicIndexing); CHECK_BOOL_FEATURE(shaderClipDistance); CHECK_BOOL_FEATURE(shaderCullDistance); - CHECK_BOOL_FEATURE(shaderFloat64); CHECK_BOOL_FEATURE(shaderInt64); CHECK_BOOL_FEATURE(shaderInt16); - CHECK_BOOL_FEATURE(shaderResourceResidency); - CHECK_BOOL_FEATURE(shaderResourceMinLod); - CHECK_BOOL_FEATURE(sparseBinding); - CHECK_BOOL_FEATURE(sparseResidencyBuffer); - CHECK_BOOL_FEATURE(sparseResidencyImage2D); - CHECK_BOOL_FEATURE(sparseResidencyImage3D); - CHECK_BOOL_FEATURE(sparseResidency2Samples); - CHECK_BOOL_FEATURE(sparseResidency4Samples); - CHECK_BOOL_FEATURE(sparseResidency8Samples); - CHECK_BOOL_FEATURE(sparseResidency16Samples); - CHECK_BOOL_FEATURE(sparseResidencyAliased); - CHECK_BOOL_FEATURE(variableMultisampleRate); - CHECK_BOOL_FEATURE(inheritedQueries); + CHECK_BOOL_FEATURE_MULTIVIEW(multiview); + CHECK_BOOL_FEATURE_MULTIVIEW(multiviewGeometryShader); #undef CHECK_BOOL_FEATURE diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index 3d7745b45254..d9ae5ddd7064 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -12,6 +12,7 @@ #include "GPU/Vulkan/PipelineManagerVulkan.h" #include "GPU/Vulkan/ShaderManagerVulkan.h" #include "GPU/Common/DrawEngineCommon.h" +#include "GPU/Common/ShaderId.h" #include "Common/GPU/thin3d.h" #include "Common/GPU/Vulkan/VulkanRenderManager.h" #include "Common/GPU/Vulkan/VulkanQueueRunner.h" @@ -296,7 +297,12 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, desc->pipelineLayout = layout; - VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, pipelineFlags, variantBitmask, "game"); + std::string tag = "game"; +#ifdef _DEBUG + tag = FragmentShaderDesc(fs->GetID()) + " VS " + VertexShaderDesc(vs->GetID()); +#endif + + VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, pipelineFlags, variantBitmask, tag.c_str()); vulkanPipeline->pipeline = pipeline; if (useBlendConstant) { @@ -336,6 +342,9 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager * if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) { pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT; } + if (vs->Flags() & VertexShaderFlags::MULTI_VIEW) { + pipelineFlags |= PipelineFlags::USES_MULTIVIEW; + } VulkanPipeline *pipeline = CreateVulkanPipeline( renderManager, pipelineCache_, layout, pipelineFlags, diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 249078cf434c..6356f43e0a03 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -135,8 +135,8 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co } } -VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, VShaderID id, const char *code, bool useHWTransform) - : vulkan_(vulkan), useHWTransform_(useHWTransform), id_(id) { +VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, VShaderID id, VertexShaderFlags flags, const char *code, bool useHWTransform) + : vulkan_(vulkan), useHWTransform_(useHWTransform), flags_(flags), id_(id) { source_ = code; module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str(), new std::string(VertexShaderDesc(id))); if (!module_) { @@ -331,10 +331,11 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader std::string genErrorString; uint64_t uniformMask = 0; // Not used uint32_t attributeMask = 0; // Not used - bool success = GenerateVertexShader(VSID, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &genErrorString); + VertexShaderFlags flags{}; + bool success = GenerateVertexShader(VSID, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &flags, &genErrorString); _assert_msg_(success, "VS gen error: %s", genErrorString.c_str()); _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_)); - vs = new VulkanVertexShader(vulkan, VSID, codeBuffer_, useHWTransform); + vs = new VulkanVertexShader(vulkan, VSID, flags, codeBuffer_, useHWTransform); vsCache_.Insert(VSID, vs); } @@ -343,7 +344,7 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader // Fragment shader not in cache. Let's compile it. std::string genErrorString; uint64_t uniformMask = 0; // Not used - FragmentShaderFlags flags; + FragmentShaderFlags flags{}; bool success = GenerateFragmentShader(FSID, codeBuffer_, compat_, draw_->GetBugs(), &uniformMask, &flags, &genErrorString); _assert_msg_(success, "FS gen error: %s", genErrorString.c_str()); _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_)); @@ -515,11 +516,12 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { std::string genErrorString; uint32_t attributeMask = 0; uint64_t uniformMask = 0; - if (!GenerateVertexShader(id, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &genErrorString)) { + VertexShaderFlags flags; + if (!GenerateVertexShader(id, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &flags, &genErrorString)) { return false; } _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_)); - VulkanVertexShader *vs = new VulkanVertexShader(vulkan, id, codeBuffer_, useHWTransform); + VulkanVertexShader *vs = new VulkanVertexShader(vulkan, id, flags, codeBuffer_, useHWTransform); vsCache_.Insert(id, vs); } uint32_t vendorID = vulkan->GetPhysicalDeviceProperties().properties.vendorID; diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h index 4c89f91ef5f6..88f138d923f8 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.h +++ b/GPU/Vulkan/ShaderManagerVulkan.h @@ -61,13 +61,14 @@ class VulkanFragmentShader { class VulkanVertexShader { public: - VulkanVertexShader(VulkanContext *vulkan, VShaderID id, const char *code, bool useHWTransform); + VulkanVertexShader(VulkanContext *vulkan, VShaderID id, VertexShaderFlags flags, const char *code, bool useHWTransform); ~VulkanVertexShader(); const std::string &source() const { return source_; } bool Failed() const { return failed_; } - bool UseHWTransform() const { return useHWTransform_; } + bool UseHWTransform() const { return useHWTransform_; } // TODO: Roll into flags + VertexShaderFlags Flags() const { return flags_; } std::string GetShaderString(DebugShaderStringType type) const; Promise *GetModule() { return module_; } @@ -81,6 +82,7 @@ class VulkanVertexShader { bool failed_ = false; bool useHWTransform_; VShaderID id_; + VertexShaderFlags flags_; }; class VulkanGeometryShader { diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index c29d2317d928..4e877d335a66 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -365,7 +365,8 @@ void DrawEngineVulkan::BindShaderBlendTex() { // Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? if (!gstate.isModeClear()) { if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { - bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + VirtualFramebuffer *curRenderVfb = framebufferManager_->GetCurrentRenderVFB(); + bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, curRenderVfb, BINDFBCOLOR_MAY_COPY, Draw::ALL_LAYERS); _dbg_assert_(bindResult); boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW); boundSecondaryIsInputAttachment_ = false; @@ -376,7 +377,7 @@ void DrawEngineVulkan::BindShaderBlendTex() { dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { draw_->BindCurrentFramebufferForColorInput(); - boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW); + boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW_LAYER, (void *)0); boundSecondaryIsInputAttachment_ = true; fboTexBindState_ = FBO_TEX_NONE; } else { diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index c465aa31c242..8f80aa8474db 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -846,6 +846,6 @@ std::string TextureCacheVulkan::DebugGetSamplerString(std::string id, DebugShade } void *TextureCacheVulkan::GetNativeTextureView(const TexCacheEntry *entry) { - VkImageView view = entry->vkTex->GetImageView(); + VkImageView view = entry->vkTex->GetImageArrayView(); return (void *)view; } diff --git a/GPU/Vulkan/VulkanUtil.cpp b/GPU/Vulkan/VulkanUtil.cpp index fb82f9a47414..d23f2ad9b24e 100644 --- a/GPU/Vulkan/VulkanUtil.cpp +++ b/GPU/Vulkan/VulkanUtil.cpp @@ -136,7 +136,7 @@ void VulkanComputeShaderManager::DestroyDeviceObjects() { VkDescriptorSet VulkanComputeShaderManager::GetDescriptorSet(VkImageView image, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range, VkBuffer buffer2, VkDeviceSize offset2, VkDeviceSize range2) { int curFrame = vulkan_->GetCurFrame(); FrameData &frameData = frameData_[curFrame]; - VkDescriptorSet desc = frameData.descPool.Allocate(1, &descriptorSetLayout_); + VkDescriptorSet desc = frameData.descPool.Allocate(1, &descriptorSetLayout_, "compute_descset"); _assert_(desc != VK_NULL_HANDLE); VkWriteDescriptorSet writes[2]{}; diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index 158fc547d040..95bd12d276d3 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -350,6 +350,41 @@ void GameSettingsScreen::CreateViews() { graphicsSettings->Add(new ItemHeader(gr->T("Postprocessing effect"))); + bool multiViewSupported = draw->GetDeviceCaps().multiViewSupported; + + auto enableStereo = [=]() -> bool { + return g_Config.bStereoRendering && multiViewSupported; + }; + + if (draw->GetDeviceCaps().multiViewSupported) { + graphicsSettings->Add(new CheckBox(&g_Config.bStereoRendering, gr->T("Stereo rendering"))); + std::vector stereoShaderNames; + + ChoiceWithValueDisplay *stereoShaderChoice = graphicsSettings->Add(new ChoiceWithValueDisplay(&g_Config.sStereoToMonoShader, "Stereo display shader", &PostShaderTranslateName)); + stereoShaderChoice->SetEnabledFunc(enableStereo); + stereoShaderChoice->OnClick.Add([=](EventParams &e) { + auto gr = GetI18NCategory("Graphics"); + auto procScreen = new PostProcScreen(gr->T("Stereo display shader"), 0, true); + if (e.v) + procScreen->SetPopupOrigin(e.v); + screenManager()->push(procScreen); + return UI::EVENT_DONE; + }); + const ShaderInfo *shaderInfo = GetPostShaderInfo(g_Config.sStereoToMonoShader); + if (shaderInfo) { + for (size_t i = 0; i < ARRAY_SIZE(shaderInfo->settings); ++i) { + auto &setting = shaderInfo->settings[i]; + if (!setting.name.empty()) { + auto &value = g_Config.mPostShaderSetting[StringFromFormat("%sSettingValue%d", shaderInfo->section.c_str(), i + 1)]; + PopupSliderChoiceFloat *settingValue = graphicsSettings->Add(new PopupSliderChoiceFloat(&value, setting.minValue, setting.maxValue, ps->T(setting.name), setting.step, screenManager())); + settingValue->SetEnabledFunc([&] { + return g_Config.iRenderingMode != FB_NON_BUFFERED_MODE && enableStereo(); + }); + } + } + } + } + std::set alreadyAddedShader; for (int i = 0; i < (int)g_Config.vPostShaderNames.size() + 1 && i < ARRAY_SIZE(shaderNames_); ++i) { // Vector element pointer get invalidated on resize, cache name to have always a valid reference in the rendering thread @@ -357,15 +392,15 @@ void GameSettingsScreen::CreateViews() { postProcChoice_ = graphicsSettings->Add(new ChoiceWithValueDisplay(&shaderNames_[i], StringFromFormat("%s #%d", gr->T("Postprocessing Shader"), i + 1), &PostShaderTranslateName)); postProcChoice_->OnClick.Add([=](EventParams &e) { auto gr = GetI18NCategory("Graphics"); - auto procScreen = new PostProcScreen(gr->T("Postprocessing Shader"), i); + auto procScreen = new PostProcScreen(gr->T("Postprocessing Shader"), i, false); procScreen->OnChoice.Handle(this, &GameSettingsScreen::OnPostProcShaderChange); if (e.v) procScreen->SetPopupOrigin(e.v); screenManager()->push(procScreen); return UI::EVENT_DONE; }); - postProcChoice_->SetEnabledFunc([] { - return g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; + postProcChoice_->SetEnabledFunc([&] { + return g_Config.iRenderingMode != FB_NON_BUFFERED_MODE && !enableStereo(); }); // No need for settings on the last one. @@ -387,8 +422,8 @@ void GameSettingsScreen::CreateViews() { settingValue->SetEnabled(false); } else { PopupSliderChoiceFloat *settingValue = graphicsSettings->Add(new PopupSliderChoiceFloat(&value, setting.minValue, setting.maxValue, ps->T(setting.name), setting.step, screenManager())); - settingValue->SetEnabledFunc([] { - return g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; + settingValue->SetEnabledFunc([&] { + return g_Config.iRenderingMode != FB_NON_BUFFERED_MODE && !enableStereo(); }); } } diff --git a/UI/GameSettingsScreen.h b/UI/GameSettingsScreen.h index 82b4cd859e3c..2fd7d6e59d41 100644 --- a/UI/GameSettingsScreen.h +++ b/UI/GameSettingsScreen.h @@ -142,6 +142,7 @@ class GameSettingsScreen : public UIDialogScreenWithGameBackground { bool enableReports_ = false; bool enableReportsSet_ = false; bool analogSpeedMapped_ = false; + std::string shaderNames_[256]; std::string searchFilter_; diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp index d90d92f76d76..7d5421d0ec30 100644 --- a/UI/MiscScreens.cpp +++ b/UI/MiscScreens.cpp @@ -521,7 +521,7 @@ void PromptScreen::TriggerFinish(DialogResult result) { UIDialogScreenWithBackground::TriggerFinish(result); } -PostProcScreen::PostProcScreen(const std::string &title, int id) : ListPopupScreen(title), id_(id) { } +PostProcScreen::PostProcScreen(const std::string &title, int id, bool showStereoShaders) : ListPopupScreen(title), id_(id), showStereoShaders_(showStereoShaders) { } void PostProcScreen::CreateViews() { auto ps = GetI18NCategory("PostShaders"); @@ -530,12 +530,16 @@ void PostProcScreen::CreateViews() { std::vector items; int selected = -1; const std::string selectedName = id_ >= (int)g_Config.vPostShaderNames.size() ? "Off" : g_Config.vPostShaderNames[id_]; + for (int i = 0; i < (int)shaders_.size(); i++) { if (!shaders_[i].visible) continue; + if (shaders_[i].isStereo != showStereoShaders_) + continue; if (shaders_[i].section == selectedName) - selected = i; + selected = (int)indexTranslation_.size(); items.push_back(ps->T(shaders_[i].section.c_str(), shaders_[i].name.c_str())); + indexTranslation_.push_back(i); } adaptor_ = UI::StringVectorListAdaptor(items, selected); ListPopupScreen::CreateViews(); @@ -544,11 +548,16 @@ void PostProcScreen::CreateViews() { void PostProcScreen::OnCompleted(DialogResult result) { if (result != DR_OK) return; - const std::string &value = shaders_[listView_->GetSelected()].section; - if (id_ < (int)g_Config.vPostShaderNames.size()) - g_Config.vPostShaderNames[id_] = value; - else - g_Config.vPostShaderNames.push_back(value); + const std::string &value = shaders_[indexTranslation_[listView_->GetSelected()]].section; + // I feel this logic belongs more in the caller, but eh... + if (showStereoShaders_) { + g_Config.sStereoToMonoShader = value; + } else { + if (id_ < (int)g_Config.vPostShaderNames.size()) + g_Config.vPostShaderNames[id_] = value; + else + g_Config.vPostShaderNames.push_back(value); + } } TextureShaderScreen::TextureShaderScreen(const std::string &title) : ListPopupScreen(title) {} diff --git a/UI/MiscScreens.h b/UI/MiscScreens.h index 164a06cfec67..e799c20828f2 100644 --- a/UI/MiscScreens.h +++ b/UI/MiscScreens.h @@ -110,7 +110,7 @@ class NewLanguageScreen : public ListPopupScreen { class PostProcScreen : public ListPopupScreen { public: - PostProcScreen(const std::string &title, int id); + PostProcScreen(const std::string &title, int id, bool showStereoShaders); void CreateViews() override; @@ -121,6 +121,8 @@ class PostProcScreen : public ListPopupScreen { bool ShowButtons() const override { return true; } std::vector shaders_; int id_; + bool showStereoShaders_; + std::vector indexTranslation_; }; class TextureShaderScreen : public ListPopupScreen { diff --git a/Windows/PPSSPP.vcxproj b/Windows/PPSSPP.vcxproj index 63ecbc169191..67d3f25f66d7 100644 --- a/Windows/PPSSPP.vcxproj +++ b/Windows/PPSSPP.vcxproj @@ -1498,6 +1498,7 @@ + diff --git a/Windows/PPSSPP.vcxproj.filters b/Windows/PPSSPP.vcxproj.filters index 0706f8fb9c4a..9575308dd8cc 100644 --- a/Windows/PPSSPP.vcxproj.filters +++ b/Windows/PPSSPP.vcxproj.filters @@ -711,6 +711,9 @@ Other Platforms\SDL + + Resource Files + diff --git a/assets/shaders/defaultshaders.ini b/assets/shaders/defaultshaders.ini index a125a09206fc..79bb4bb5e6e1 100644 --- a/assets/shaders/defaultshaders.ini +++ b/assets/shaders/defaultshaders.ini @@ -186,3 +186,25 @@ Name=MMPX (2x) Author=Morgan McGuire and Mara Gagiu Compute=tex_mmpx.csh Scale=2 +[RedBlue] +Type=StereoToMono +Name=Red/Blue glasses (anaglyph) +Author=Henrik Rydgård +SettingName1=ColorPreservation +SettingDefaultValue1=0.5 +SettingMaxValue1=1.0 +SettingMinValue1=0.0 +SettingStep1=0.05 +SettingName2=GreenLevel +SettingDefaultValue2=0.5 +SettingMaxValue2=1.0 +SettingMinValue2=0.0 +SettingStep2=0.05 +Fragment=stereo_red_blue.fsh +Vertex=fxaa.vsh +[SideBySize] +Type=StereoToMono +Name=SideBySide Stereo +Author=Henrik Rydgård +Fragment=stereo_sbs.fsh +Vertex=fxaa.vsh diff --git a/assets/shaders/stereo_red_blue.fsh b/assets/shaders/stereo_red_blue.fsh new file mode 100644 index 000000000000..1ad875b6b3c5 --- /dev/null +++ b/assets/shaders/stereo_red_blue.fsh @@ -0,0 +1,31 @@ +// Red/Blue glasses stereo, also known as anaglyph. +// +// NOTE: Will only be compiled for Vulkan so doesn't follow all the usual conventions. + +uniform sampler2DArray sampler0; +varying vec2 v_texcoord0; + +uniform vec4 u_setting; + +void main() { + float saturation = u_setting.x; + float greenMix = u_setting.y; + + // To be adjusted. Used to desaturate colors. + vec3 grayDot = vec3(0.35, 0.5, 0.15); + // And these are the output color channels. + vec3 red = vec3(1.0, 0.0, 0.0); + vec3 blue = vec3(0.0, greenMix, 1.0); + + vec3 left = texture(sampler0, vec3(v_texcoord0, 0.0)).xyz; + vec3 right = texture(sampler0, vec3(v_texcoord0, 1.0)).xyz; + + float leftGray = dot(left, grayDot); + float rightGray = dot(right, grayDot); + + vec3 leftColor = mix(vec3(leftGray), left, saturation) * red; + vec3 rightColor = mix(vec3(rightGray), right, saturation) * blue; + + gl_FragColor.rgb = leftColor + rightColor; + gl_FragColor.a = 1.0; +} diff --git a/assets/shaders/stereo_sbs.fsh b/assets/shaders/stereo_sbs.fsh new file mode 100644 index 000000000000..c308f3aa0d4c --- /dev/null +++ b/assets/shaders/stereo_sbs.fsh @@ -0,0 +1,18 @@ +// Side by side stereo, useful for old 3D TVs. +// +// NOTE: Will only be compiled for Vulkan so doesn't follow all the usual conventions. + +uniform sampler2DArray sampler0; +varying vec2 v_texcoord0; + +uniform vec4 u_setting; + +void main() { + if (v_texcoord0.x < 0.5) { + gl_FragColor.rgb = texture(sampler0, vec3(v_texcoord0.x * 2.0, v_texcoord0.y, 0.0)).xyz; + } else { + gl_FragColor.rgb = texture(sampler0, vec3((v_texcoord0.x - 0.5) * 2.0, v_texcoord0.y, 1.0)).xyz; + } + + gl_FragColor.a = 1.0; +} diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index a9546408a525..6701e162fd54 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -28,6 +28,8 @@ static constexpr size_t CODE_BUFFER_SIZE = 32768; bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs bugs, std::string *errorString) { + buffer[0] = '\0'; + uint64_t uniformMask; switch (lang) { case ShaderLanguage::GLSL_VULKAN: @@ -61,33 +63,35 @@ bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs } bool GenerateVShader(VShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs bugs, std::string *errorString) { + buffer[0] = '\0'; + uint32_t attrMask; uint64_t uniformMask; switch (lang) { case ShaderLanguage::GLSL_VULKAN: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_VULKAN); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); } case ShaderLanguage::GLSL_1xx: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_1xx); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); } case ShaderLanguage::GLSL_3xx: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_3xx); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); } case ShaderLanguage::HLSL_D3D9: { ShaderLanguageDesc compat(ShaderLanguage::HLSL_D3D9); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); } case ShaderLanguage::HLSL_D3D11: { ShaderLanguageDesc compat(ShaderLanguage::HLSL_D3D11); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); } default: return false; @@ -95,6 +99,8 @@ bool GenerateVShader(VShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs } bool GenerateGShader(GShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs bugs, std::string *errorString) { + buffer[0] = '\0'; + errorString->clear(); switch (lang) {