Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VideoCommon: Add shader logic ops support for Apple Silicon GPUs #9990

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,10 @@ add_subdirectory(Externals/glslang)

if(ENABLE_VULKAN)
add_definitions(-DHAS_VULKAN)

if(APPLE)
add_subdirectory(Externals/MoltenVK)
endif()
endif()

if(NOT WIN32 OR (NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")))
Expand Down
20 changes: 20 additions & 0 deletions Externals/MoltenVK/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
include(ExternalProject)

ExternalProject_Add(MoltenVK
GIT_REPOSITORY https://github.com/KhronosGroup/MoltenVK.git
GIT_TAG v1.1.4

CONFIGURE_COMMAND <SOURCE_DIR>/fetchDependencies --macos

PATCH_COMMAND git reset --hard v1.1.4 && git apply ${CMAKE_SOURCE_DIR}/Externals/MoltenVK/patches/0001-SPIRVToMSLConverter-Enable-use_framebuffer_fetch_sub.patch

BUILD_COMMAND make -C <SOURCE_DIR> macos
BUILD_IN_SOURCE ON
BUILD_BYPRODUCTS <SOURCE_DIR>/Package/Release/MoltenVK/dylib/macOS/libMoltenVK.dylib

INSTALL_COMMAND ""

LOG_CONFIGURE ON
LOG_BUILD ON
LOG_OUTPUT_ON_FAILURE ON
)
Binary file removed Externals/MoltenVK/libvulkan.dylib
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
From 4ca33b7a9b149c6fbcc1c88ce08fc49f21294f6d Mon Sep 17 00:00:00 2001
From: OatmealDome <julian@oatmealdome.me>
Date: Sat, 31 Jul 2021 19:18:35 -0400
Subject: [PATCH] SPIRVToMSLConverter: Enable use_framebuffer_fetch_subpasses

---
.../MoltenVKShaderConverter/SPIRVToMSLConverter.cpp | 1 +
1 file changed, 1 insertion(+)

diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
index 17c79394..97e98004 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
@@ -92,6 +92,7 @@ MVK_PUBLIC_SYMBOL SPIRVToMSLConversionOptions::SPIRVToMSLConversionOptions() {
#endif

mslOptions.pad_fragment_output_components = true;
+ mslOptions.use_framebuffer_fetch_subpasses = true;
}

MVK_PUBLIC_SYMBOL bool mvk::MSLShaderInput::matches(const mvk::MSLShaderInput& other) const {
--
2.30.1 (Apple Git-130)

1 change: 0 additions & 1 deletion Externals/MoltenVK/version.txt

This file was deleted.

8 changes: 6 additions & 2 deletions Source/Core/DolphinQt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,12 @@ if(APPLE)
endforeach()

# Copy MoltenVK into the bundle
target_sources(dolphin-emu PRIVATE "${CMAKE_SOURCE_DIR}/Externals/MoltenVK/libvulkan.dylib")
set_source_files_properties("${CMAKE_SOURCE_DIR}/Externals/MoltenVK/libvulkan.dylib" PROPERTIES MACOSX_PACKAGE_LOCATION Frameworks)
if(ENABLE_VULKAN)
add_dependencies(dolphin-emu MoltenVK)
ExternalProject_Get_Property(MoltenVK SOURCE_DIR)
target_sources(dolphin-emu PRIVATE "${SOURCE_DIR}/Package/Release/MoltenVK/dylib/macOS/libMoltenVK.dylib")
set_source_files_properties("${SOURCE_DIR}/Package/Release/MoltenVK/dylib/macOS/libMoltenVK.dylib" PROPERTIES MACOSX_PACKAGE_LOCATION Frameworks GENERATED ON)
endif()

# Update library references to make the bundle portable
include(DolphinPostprocessBundle)
Expand Down
24 changes: 11 additions & 13 deletions Source/Core/VideoBackends/OGL/OGLRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
g_Config.backend_info.AdapterName = g_ogl_config.gl_renderer;

g_Config.backend_info.bSupportsDualSourceBlend =
!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) &&
(GLExtensions::Supports("GL_ARB_blend_func_extended") ||
GLExtensions::Supports("GL_EXT_blend_func_extended"));
g_Config.backend_info.bSupportsPrimitiveRestart =
Expand Down Expand Up @@ -1169,14 +1170,11 @@ void Renderer::ApplyBlendingState(const BlendingState state)
if (m_current_blend_state == state)
return;

bool useDualSource =
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
// Only use shader blend if we need to and we don't support dual-source blending directly
bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
bool use_dual_source = state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
bool use_shader_blend =
!use_dual_source && state.usedualsrc && g_ActiveConfig.backend_info.bSupportsFramebufferFetch;

if (useShaderBlend)
if (use_shader_blend)
{
glDisable(GL_BLEND);
}
Expand All @@ -1186,18 +1184,18 @@ void Renderer::ApplyBlendingState(const BlendingState state)
GL_ONE,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
use_dual_source ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
use_dual_source ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
const GLenum dst_factors[8] = {GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
use_dual_source ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
use_dual_source ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};

Expand Down
4 changes: 4 additions & 0 deletions Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ static const char SHADER_HEADER[] = R"(
#define SAMPLER_BINDING(x) layout(set = 1, binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8))
#define SSBO_BINDING(x) layout(set = 2, binding = x)
#define INPUT_ATTACHMENT_BINDING(x, y, z) layout(set = x, binding = y, input_attachment_index = z)
#define VARYING_LOCATION(x) layout(location = x)
#define FORCE_EARLY_Z layout(early_fragment_tests) in

// Metal framebuffer fetch helpers.
#define FB_FETCH_VALUE subpassLoad(in_ocol0)

// hlsl to glsl function translation
#define API_VULKAN 1
#define float2 vec2
Expand Down
11 changes: 10 additions & 1 deletion Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsBPTCTextures = false; // Dependent on features.
config->backend_info.bSupportsLogicOp = false; // Dependent on features.
config->backend_info.bSupportsLargePoints = false; // Dependent on features.
config->backend_info.bSupportsFramebufferFetch = false; // No support.
config->backend_info.bSupportsFramebufferFetch = false; // Dependent on OS and features.
}

void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)
Expand Down Expand Up @@ -336,6 +336,15 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
properties.limits.pointSizeRange[0] <= 1.0f &&
properties.limits.pointSizeRange[1] >= 16;

std::string device_name = properties.deviceName;
u32 vendor_id = properties.vendorID;

// Only Apple family GPUs support framebuffer fetch.
if (vendor_id == 0x106B || device_name.find("Apple") != std::string::npos)
{
config->backend_info.bSupportsFramebufferFetch = true;
}

// Our usage of primitive restart appears to be broken on AMD's binary drivers.
// Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4.
if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART))
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Vulkan/VulkanLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ static bool OpenVulkanLibrary()
if (libvulkan_env && s_vulkan_module.Open(libvulkan_env))
return true;

// Use the libvulkan.dylib from the application bundle.
std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
// Use the libMoltenVK.dylib from the application bundle.
std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libMoltenVK.dylib";
return s_vulkan_module.Open(filename.c_str());
#else
std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
Expand Down
4 changes: 4 additions & 0 deletions Source/Core/VideoCommon/ConstantManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ using int4 = std::array<s32, 4>;

enum class SrcBlendFactor : u32;
enum class DstBlendFactor : u32;
enum class LogicOp : u32;

struct PixelShaderConstants
{
Expand Down Expand Up @@ -53,6 +54,9 @@ struct PixelShaderConstants
DstBlendFactor blend_dst_factor_alpha;
u32 blend_subtract;
u32 blend_subtract_alpha;
// For shader_framebuffer_fetch logic ops:
u32 logic_op_enable; // bool
LogicOp logic_op_mode;
};

struct VertexShaderConstants
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/GXPipelineTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace VideoCommon
// As pipelines encompass both shader UIDs and render states, changes to either of these should
// also increment the pipeline UID version. Incrementing the UID version will cause all UID
// caches to be invalidated.
constexpr u32 GX_PIPELINE_UID_VERSION = 3; // Last changed in PR 9532
constexpr u32 GX_PIPELINE_UID_VERSION = 4; // Last changed in PR 9990

struct GXPipelineUid
{
Expand Down
84 changes: 76 additions & 8 deletions Source/Core/VideoCommon/PixelShaderGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ PixelShaderUid GetPixelShaderUid()
BlendingState state = {};
state.Generate(bpmem);

if (state.usedualsrc && state.dstalpha && g_ActiveConfig.backend_info.bSupportsFramebufferFetch &&
if (state.usedualsrc && g_ActiveConfig.backend_info.bSupportsFramebufferFetch &&
!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
uid_data->blend_enable = state.blendenable;
Expand All @@ -330,6 +330,9 @@ PixelShaderUid GetPixelShaderUid()
uid_data->blend_subtract_alpha = state.subtractAlpha;
}

uid_data->logic_op_enable = state.logicopenable;
uid_data->logic_op_mode = u32(state.logicmode.Value());

return out;
}

Expand Down Expand Up @@ -422,6 +425,8 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
"\tuint blend_dst_factor_alpha;\n"
"\tbool blend_subtract;\n"
"\tbool blend_subtract_alpha;\n"
"\tbool logic_op_enable;\n"
"\tuint logic_op_mode;\n"
"}};\n\n");
out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"
Expand Down Expand Up @@ -544,6 +549,7 @@ static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::stri
static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType api_type,
bool per_pixel_depth, bool use_dual_source);
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data);
static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
bool use_dual_source);
static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
Expand Down Expand Up @@ -613,17 +619,48 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
}
}

// Only use dual-source blending when required on drivers that don't support it very well.
const bool use_dual_source =
host_config.backend_dual_source_blend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) ||
uid_data->useDstAlpha);
const bool use_shader_blend =
!use_dual_source && (uid_data->useDstAlpha && host_config.backend_shader_framebuffer_fetch);
const bool use_dual_source = host_config.backend_dual_source_blend;
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch;
const bool use_shader_logic_op =
#ifdef __APPLE__
!host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch;
#else
false;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A separate PR to add shader logic ops support on OpenGL ES (Android) will be opened after this is merged.

#endif

if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
{
if (use_dual_source)
{
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
}
else if (use_shader_blend)
{
// Metal doesn't support a single unified variable for both input and output, so we declare
// the output separately. The input will be defined later below.
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 real_ocol0;\n");
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
}

if (use_shader_blend || use_shader_logic_op)
{
// Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
}
}
else if (use_dual_source)
#else
if (use_dual_source)
#endif
{
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
Expand Down Expand Up @@ -948,6 +985,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos

WriteFog(out, uid_data);

if (use_shader_logic_op)
WriteLogicOp(out, uid_data);

// Write the color and alpha values to the framebuffer
// If using shader blend, we still use the separate alpha
WriteColor(out, api_type, uid_data, use_dual_source || use_shader_blend);
Expand Down Expand Up @@ -1585,6 +1625,34 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n");
}

static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data)
{
if (uid_data->logic_op_enable)
{
static constexpr std::array<const char*, 16> logic_op_mode{
"int4(0, 0, 0, 0)", // CLEAR
"prev & fb_value", // AND
"prev & ~fb_value", // AND_REVERSE
"prev", // COPY
"~prev & fb_value", // AND_INVERTED
"fb_value", // NOOP
"prev ^ fb_value", // XOR
"prev | fb_value", // OR
"~(prev | fb_value)", // NOR
"~(prev ^ fb_value)", // EQUIV
"~fb_value", // INVERT
"prev | ~fb_value", // OR_REVERSE
"~prev", // COPY_INVERTED
"~prev | fb_value", // OR_INVERTED
"~(prev & fb_value)", // NAND
"int4(255, 255, 255, 255)", // SET
};

out.Write("\tint4 fb_value = int4(FB_FETCH_VALUE * 255.0);\n");
out.Write("\tprev = {};\n", logic_op_mode[uid_data->logic_op_mode]);
}
}

static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
bool use_dual_source)
{
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoCommon/PixelShaderGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ struct pixel_shader_uid_data
DstBlendFactor blend_dst_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract : 1; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract_alpha : 1; // Only used with shader_framebuffer_fetch blend
u32 logic_op_enable : 1; // Only used with shader_framebuffer_fetch logic ops
u32 logic_op_mode : 4; // Only used with shader_framebuffer_fetch logic ops

u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;
Expand Down
10 changes: 10 additions & 0 deletions Source/Core/VideoCommon/PixelShaderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,16 @@ void PixelShaderManager::SetBlendModeChanged()
constants.blend_subtract_alpha = state.subtractAlpha;
dirty = true;
}
if (constants.logic_op_enable != state.logicopenable)
{
constants.logic_op_enable = state.logicopenable;
dirty = true;
}
if (constants.logic_op_mode != state.logicmode)
{
constants.logic_op_mode = state.logicmode;
dirty = true;
}
s_bDestAlphaDirty = true;
}

Expand Down
7 changes: 7 additions & 0 deletions Source/Core/VideoCommon/ShaderCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,14 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
config.blending_state = blending_state;
config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState();

#ifdef __APPLE__
// We can use framebuffer fetch with Metal (Vulkan over MoltenVK) to emulate logic ops in the
// fragment shader.
if (config.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp &&
!g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
#else
if (config.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp)
#endif
{
WARN_LOG_FMT(VIDEO,
"Approximating logic op with blending, this will produce incorrect rendering.");
Expand Down
Loading