Skip to content

Commit

Permalink
Provoking vertex (software transform): Simpler solution
Browse files Browse the repository at this point in the history
Simply rotate each primitive in the index buffer to simulate a different provoking vertex.

Since at this point we have already generated a plain primitive index
buffer, it's easy to manipulate like this.

An even better solution would be to generate rotated index buffers
directly during decode, although that code is super critical and does
not need more complexity..

We could now also enable this for hardware transform but I'm leaving
that for later.
  • Loading branch information
hrydgard committed Jul 17, 2024
1 parent a69d92c commit c8dccd4
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 74 deletions.
99 changes: 35 additions & 64 deletions GPU/Common/SoftwareTransformCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,33 +125,6 @@ static bool IsReallyAClear(const TransformedVertex *transformed, int numVerts, f
return true;
}

static int ColorIndexOffset(int prim, GEShadeMode shadeMode, bool clearMode) {
if (shadeMode != GE_SHADE_FLAT || clearMode) {
return 0;
}

switch (prim) {
case GE_PRIM_LINES:
case GE_PRIM_LINE_STRIP:
return 1;

case GE_PRIM_TRIANGLES:
case GE_PRIM_TRIANGLE_STRIP:
return 2;

case GE_PRIM_TRIANGLE_FAN:
return 1;

case GE_PRIM_RECTANGLES:
// We already use BR color when expanding, so no need to offset.
return 0;

default:
break;
}
return 0;
}

void SoftwareTransform::SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale) {
memcpy(&projMatrix_.m, mtx, 16 * sizeof(float));

Expand Down Expand Up @@ -202,11 +175,6 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f;
}

int provokeIndOffset = 0;
if (!params_.provokingVertexLast) {
provokeIndOffset = ColorIndexOffset(prim, gstate.getShadeMode(), gstate.isModeClear());
}

VertexReader reader(decoded, decVtxFormat, vertType);
if (throughmode) {
const u32 materialAmbientRGBA = gstate.getMaterialAmbientRGBA();
Expand All @@ -221,13 +189,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
vert.pos_w = 1.0f;

if (hasColor) {
if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts) {
reader.Goto(index + provokeIndOffset);
vert.color0_32 = reader.ReadColor0_8888();
reader.Goto(index);
} else {
vert.color0_32 = reader.ReadColor0_8888();
}
vert.color0_32 = reader.ReadColor0_8888();
} else {
vert.color0_32 = materialAmbientRGBA;
}
Expand Down Expand Up @@ -268,10 +230,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
if (reader.hasUV())
reader.ReadUV(ruv);

// Read all the provoking vertex values here.
Vec4f unlitColor;
if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts)
reader.Goto(index + provokeIndOffset);
if (reader.hasColor0())
reader.ReadColor0(unlitColor.AsArray());
else
Expand Down Expand Up @@ -342,34 +301,14 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
break;

case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source
// Flat uses the vertex normal, not provoking.
if (provokeIndOffset == 0) {
source = normal.Normalized(cpu_info.bSSE4_1);
} else {
reader.Goto(index);
if (reader.hasNormal())
reader.ReadNrm(source.AsArray());
if (gstate.areNormalsReversed())
source = -source;
source.Normalize();
}
source = normal.Normalized(cpu_info.bSSE4_1);
if (!reader.hasNormal()) {
ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?");
}
break;

case GE_PROJMAP_NORMAL: // Use non-normalized normal as source!
// Flat uses the vertex normal, not provoking.
if (provokeIndOffset == 0) {
source = normal;
} else {
// Need to read the normal for this vertex and weight it again..
reader.Goto(index);
if (reader.hasNormal())
reader.ReadNrm(source.AsArray());
if (gstate.areNormalsReversed())
source = -source;
}
source = normal;
if (!reader.hasNormal()) {
ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?");
}
Expand Down Expand Up @@ -751,6 +690,38 @@ bool SoftwareTransform::ExpandRectangles(int vertexCount, int &numDecodedVerts,
return true;
}

// In-place. So, better not be doing this on GPU memory!
void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize) {
switch (prim) {
case GE_PRIM_LINES:
// Swap every two indices.
for (int i = 0; i < indsSize - 1; i += 2) {
u16 temp = inds[i];
inds[i] = inds[i + 1];
inds[i + 1] = temp;
}
break;
case GE_PRIM_TRIANGLES:
// Rotate the triangle so the last becomes the first, without changing the winding order.
// This could be done with a series of pshufb.
for (int i = 0; i < indsSize - 2; i += 3) {
u16 temp = inds[i + 2];
inds[i + 2] = inds[i + 1];
inds[i + 1] = inds[i];
inds[i] = temp;
}
break;
case GE_PRIM_POINTS:
// Nothing to do,
break;
case GE_PRIM_RECTANGLES:
// Nothing to do, already using the 2nd vertex.
break;
default:
_dbg_assert_msg_(false, "IndexBufferProvokingFirstToLast: Only works with plain indexed primitives, no strips or fans")
}
}

bool SoftwareTransform::ExpandLines(int vertexCount, int &numDecodedVerts, int vertsSize, u16 *&inds, int indsSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
// Before we start, do a sanity check - does the output fit?
if ((vertexCount / 2) * 6 > indsSize) {
Expand Down
6 changes: 5 additions & 1 deletion GPU/Common/SoftwareTransformCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,13 @@ struct SoftwareTransformParams {
bool allowSeparateAlphaClear;
bool flippedY;
bool usesHalfZ;
bool provokingVertexLast;
};

// Converts an index buffer to make the provoking vertex the last.
// In-place. So, better not be doing this on GPU memory!
// TODO: We could do this already during index decode.
void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize);

class SoftwareTransform {
public:
SoftwareTransform(SoftwareTransformParams &params) : params_(params) {}
Expand Down
4 changes: 3 additions & 1 deletion GPU/D3D11/D3D11Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ std::vector<uint8_t> CompileShaderToBytecodeD3D11(const char *code, size_t codeS
if (trimmed.find("pow(f, e) will not work for negative f") != std::string::npos) {
continue;
}
WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data());
if (trimmed.size() > 1) { // ignore single nulls, not sure how they appear.
WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data());
}
}
} else {
ERROR_LOG(Log::G3D, "%s: %s\n\n%s", "errors", errors.c_str(), numberedCode.c_str());
Expand Down
7 changes: 6 additions & 1 deletion GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,15 @@ void DrawEngineD3D11::DoFlush() {
params.texCache = textureCache_;
params.allowClear = true;
params.allowSeparateAlphaClear = false; // D3D11 doesn't support separate alpha clears
params.provokingVertexLast = false;
params.flippedY = false;
params.usesHalfZ = true;

if (gstate.getShadeMode() == GE_SHADE_FLAT) {
// We need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}

// We need correct viewport values in gstate_c already.
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
Expand Down
8 changes: 7 additions & 1 deletion GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,16 @@ void DrawEngineDX9::DoFlush() {
params.texCache = textureCache_;
params.allowClear = true;
params.allowSeparateAlphaClear = false;
params.provokingVertexLast = false;
params.flippedY = false;
params.usesHalfZ = true;

if (gstate.getShadeMode() == GE_SHADE_FLAT) {
// We need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
int indexCount = RemainingIndices(inds);
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}

// We need correct viewport values in gstate_c already.
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
Expand Down
1 change: 0 additions & 1 deletion GPU/GLES/DrawEngineGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,6 @@ void DrawEngineGLES::DoFlush() {
params.texCache = textureCache_;
params.allowClear = true; // Clear in OpenGL respects scissor rects, so we'll use it.
params.allowSeparateAlphaClear = true;
params.provokingVertexLast = true;
params.flippedY = framebufferManager_->UseBufferedRendering();
params.usesHalfZ = false;

Expand Down
12 changes: 7 additions & 5 deletions GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,11 +405,13 @@ void DrawEngineVulkan::DoFlush() {
// do not respect scissor rects.
params.allowClear = framebufferManager_->UseBufferedRendering();
params.allowSeparateAlphaClear = false;
if (renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) {
// We can get the OpenGL behavior, no need for workarounds.
params.provokingVertexLast = true;
} else {
params.provokingVertexLast = false;

if (gstate.getShadeMode() == GE_SHADE_FLAT) {
if (!renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) {
// If we can't have the hardware do it, we need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}
}
params.flippedY = true;
params.usesHalfZ = true;
Expand Down

0 comments on commit c8dccd4

Please sign in to comment.