Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize out some stencil emulation, try to avoid depth write #11620

Merged
merged 4 commits into from
Dec 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ namespace Spline { struct Weight2D; }

class TessellationDataTransfer {
public:
virtual ~TessellationDataTransfer() {}
void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType);
virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;
};
Expand Down
17 changes: 14 additions & 3 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ bool CanUseHardwareTransform(int prim) {
return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES;
}

bool IsStencilTestOutputDisabled() {
// The mask applies on all stencil ops.
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) {
if (gstate.FrameBufFormat() == GE_FORMAT_565) {
return true;
}
return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP;
}
return true;
}

bool NeedsTestDiscard() {
// We assume this is called only when enabled and not trivially true (may also be for color testing.)
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF)
Expand Down Expand Up @@ -169,7 +180,7 @@ const bool nonAlphaDestFactors[16] = {
};

ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {
if (!gstate.isStencilTestEnabled() || gstate.isModeClear()) {
if (IsStencilTestOutputDisabled() || gstate.isModeClear()) {
return REPLACE_ALPHA_NO;
}

Expand Down Expand Up @@ -995,7 +1006,7 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowShaderBlend) {

int constantAlpha = 255;
BlendFactor constantAlphaGL = BlendFactor::ONE;
if (gstate.isStencilTestEnabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {
if (!IsStencilTestOutputDisabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {
switch (ReplaceAlphaWithStencilType()) {
case STENCIL_VALUE_UNIFORM:
constantAlpha = gstate.getStencilTestRef();
Expand Down Expand Up @@ -1163,7 +1174,7 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowShaderBlend) {
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ZERO);
break;
}
} else if (gstate.isStencilTestEnabled()) {
} else if (!IsStencilTestOutputDisabled()) {
switch (ReplaceAlphaWithStencilType()) {
case STENCIL_VALUE_KEEP:
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/GPUStateUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ bool IsColorTestAgainstZero();
bool IsColorTestTriviallyTrue();
bool IsAlphaTestAgainstZero();
bool NeedsTestDiscard();
bool IsStencilTestOutputDisabled();

StencilValueType ReplaceAlphaWithStencilType();
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend);
Expand Down
7 changes: 6 additions & 1 deletion GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ std::string FragmentShaderDesc(const ShaderID &id) {
case STENCIL_VALUE_INCR_8: desc << "StenIncr8 "; break;
case STENCIL_VALUE_DECR_4: desc << "StenDecr4 "; break;
case STENCIL_VALUE_DECR_8: desc << "StenDecr4 "; break;
default: desc << "StenUnknown"; break;
default: desc << "StenUnknown "; break;
}
} else if (id.Bit(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE)) {
desc << "StenOff ";
}
if (id.Bit(FS_BIT_DO_TEXTURE)) {
switch (id.Bits(FS_BIT_TEXFUNC, 3)) {
Expand Down Expand Up @@ -292,6 +294,9 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
if (stencilToAlpha != REPLACE_ALPHA_NO) {
// 4 bits
id.SetBits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4, ReplaceAlphaWithStencilType());
} else {
// Use those bits instead for whether stencil output is disabled.
id.SetBit(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, IsStencilTestOutputDisabled());
}

// 2 bits.
Expand Down
2 changes: 2 additions & 0 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ void DrawEngineD3D11::DestroyDeviceObjects() {
ClearTrackedVertexArrays();
ClearInputLayoutMap();
delete tessDataTransferD3D11;
tessDataTransferD3D11 = nullptr;
tessDataTransfer = nullptr;
delete pushVerts_;
delete pushInds_;
depthStencilCache_.Iterate([&](const uint64_t &key, ID3D11DepthStencilState *ds) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/D3D11/StateMappingD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
#endif

// Let's not write to alpha if stencil isn't enabled.
if (!gstate.isStencilTestEnabled()) {
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
Expand Down
2 changes: 1 addition & 1 deletion GPU/Directx9/StateMappingDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
#endif

// Let's not write to alpha if stencil isn't enabled.
if (!gstate.isStencilTestEnabled()) {
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/StateMappingGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
// amask is needed for both stencil and blend state so we keep it outside for now
bool amask = (gstate.pmska & 0xFF) < 128;
// Let's not write to alpha if stencil isn't enabled.
if (!gstate.isStencilTestEnabled()) {
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE },
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE },
{ GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
Expand Down
3 changes: 2 additions & 1 deletion GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ void DrawEngineVulkan::FrameData::Destroy(VulkanContext *vulkan) {

void DrawEngineVulkan::DestroyDeviceObjects() {
delete tessDataTransferVulkan;
tessDataTransfer = tessDataTransferVulkan = nullptr;
tessDataTransfer = nullptr;
tessDataTransferVulkan = nullptr;

for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
frame_[i].Destroy(vulkan_);
Expand Down
5 changes: 3 additions & 2 deletions GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer, uint32_

const char *shading = doFlatShading ? "flat" : "";
bool earlyFragmentTests = ((!enableAlphaTest && !enableColorTest) || testForceToZero) && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool hasStencilOutput = stencilToAlpha != REPLACE_ALPHA_NO || id.Bit(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE) == 0;

bool isAdreno = vulkanVendorId == VULKAN_VENDOR_QUALCOMM;

if (earlyFragmentTests) {
WRITE(p, "layout (early_fragment_tests) in;\n");
} else if (isAdreno) {
} else if (isAdreno && hasStencilOutput && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
}

Expand Down Expand Up @@ -585,7 +586,7 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer, uint32_
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
}
WRITE(p, " gl_FragDepth = z;\n");
} else if (!earlyFragmentTests && isAdreno) {
} else if (!earlyFragmentTests && isAdreno && hasStencilOutput) {
// Adreno (and possibly MESA/others) apply early frag tests even with discard in the shader.
// Writing depth prevents the bug, even with depth_unchanged specified.
WRITE(p, " gl_FragDepth = gl_FragCoord.z;\n");
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/StateMappingVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
#endif

// Let's not write to alpha if stencil isn't enabled.
if (!gstate.isStencilTestEnabled()) {
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
Expand Down