diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index b918a0d45820..2d9f2719bced 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -547,7 +547,7 @@ void SoftwareTransform::DetectOffsetTexture(int maxIndex) { } // NOTE: The viewport must be up to date! -void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result) { +void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result) { TransformedVertex *transformed = params_.transformed; TransformedVertex *transformedExpanded = params_.transformedExpanded; bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0; @@ -560,7 +560,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy bool useBufferedRendering = fbman->UseBufferedRendering(); if (prim == GE_PRIM_RECTANGLES) { - ExpandRectangles(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); + if (!ExpandRectangles(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { + result->drawIndexed = false; + result->drawNumTrans = 0; + return; + } result->drawBuffer = transformedExpanded; result->drawIndexed = true; @@ -578,11 +582,19 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy } } } else if (prim == GE_PRIM_POINTS) { - ExpandPoints(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); + if (!ExpandPoints(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { + result->drawIndexed = false; + result->drawNumTrans = 0; + return; + } result->drawBuffer = transformedExpanded; result->drawIndexed = true; } else if (prim == GE_PRIM_LINES) { - ExpandLines(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); + if (!ExpandLines(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { + result->drawIndexed = false; + result->drawNumTrans = 0; + return; + } result->drawBuffer = transformedExpanded; result->drawIndexed = true; } else { @@ -674,15 +686,21 @@ void SoftwareTransform::CalcCullParams(float &minZValue, float &maxZValue) { std::swap(minZValue, maxZValue); } -void SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { +bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { + // Before we start, do a sanity check - does the output fit? + if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) { + // Won't fit, kill the draw. + return false; + } + // Rectangles always need 2 vertices, disregard the last one if there's an odd number. vertexCount = vertexCount & ~1; numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)inds; - u16 *newInds = inds + vertexCount; - u16 *indsOut = newInds; + const u16 *indsIn = (const u16 *)(inds + indsOffset); + int newIndsOffset = indsOffset + vertexCount; + u16 *indsOut = inds + newIndsOffset; maxIndex = 4 * (vertexCount / 2); for (int i = 0; i < vertexCount; i += 2) { @@ -727,23 +745,33 @@ void SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *&i indsOut[3] = i * 2 + 3; indsOut[4] = i * 2 + 0; indsOut[5] = i * 2 + 2; + trans += 4; indsOut += 6; numTrans += 6; } - inds = newInds; + + indsOffset = newIndsOffset; + return true; } -void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { +bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { + // Before we start, do a sanity check - does the output fit? + if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) { + // Won't fit, kill the draw. + return false; + } + // Lines always need 2 vertices, disregard the last one if there's an odd number. vertexCount = vertexCount & ~1; numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)inds; - u16 *newInds = inds + vertexCount; - u16 *indsOut = newInds; + + const u16 *indsIn = (const u16 *)(inds + indsOffset); + int newIndsOffset = indsOffset + vertexCount; + u16 *indsOut = inds + newIndsOffset; float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / fabsf(gstate.getViewportXScale())); float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / fabsf(gstate.getViewportYScale())); @@ -856,16 +884,23 @@ void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, } } - inds = newInds; + indsOffset = newIndsOffset; + return true; } -void SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { +bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { + // Before we start, do a sanity check - does the output fit? + if (vertexCount * 6 > indexBufferSize - indsOffset) { + // Won't fit, kill the draw. + return false; + } + numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)inds; - u16 *newInds = inds + vertexCount; - u16 *indsOut = newInds; + const u16 *indsIn = (const u16 *)(inds + indsOffset); + int newIndsOffset = indsOffset + vertexCount; + u16 *indsOut = inds + newIndsOffset; float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / gstate.getViewportXScale()); float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / gstate.getViewportYScale()); @@ -924,5 +959,7 @@ void SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, numTrans += 6; } - inds = newInds; + + indsOffset = newIndsOffset; + return true; } diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index 480bd18e519e..da15ffad9305 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -62,19 +62,18 @@ struct SoftwareTransformParams { class SoftwareTransform { public: - SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) { - } + SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {} void SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale); void Decode(int prim, u32 vertexType, const DecVtxFormat &decVtxFormat, int maxIndex, SoftwareTransformResult *result); void DetectOffsetTexture(int maxIndex); - void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result); + void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result); protected: void CalcCullParams(float &minZValue, float &maxZValue); - void ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); - void ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); - void ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + bool ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + bool ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + bool ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); const SoftwareTransformParams ¶ms_; Lin::Matrix4x4 projMatrix_; diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 7780bfa28a26..f018980f4a7b 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -598,7 +598,7 @@ void DrawEngineD3D11::DoFlush() { prim = GE_PRIM_TRIANGLES; VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); - u16 *inds = decIndex_; + u16 *const inds = decIndex_; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded_; @@ -644,8 +644,9 @@ void DrawEngineD3D11::DoFlush() { // Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state. ApplyDrawState(prim); + int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -683,11 +684,11 @@ void DrawEngineD3D11::DoFlush() { UINT iOffset; int iSize = sizeof(uint16_t) * result.drawNumTrans; uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize); - memcpy(iptr, inds, iSize); + memcpy(iptr, inds + indsOffset, iSize); pushInds_->EndPush(context_); context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset); context_->DrawIndexed(result.drawNumTrans, 0, 0); - } else { + } else if (result.drawNumTrans > 0) { context_->Draw(result.drawNumTrans, 0); } } else if (result.action == SW_CLEAR) { diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 9efa233dd0b8..11323f374743 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -558,7 +558,7 @@ void DrawEngineDX9::DoFlush() { prim = GE_PRIM_TRIANGLES; VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); - u16 *inds = decIndex_; + u16 *const inds = decIndex_; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded_; @@ -607,8 +607,9 @@ void DrawEngineDX9::DoFlush() { ApplyDrawState(prim); + int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -628,8 +629,8 @@ void DrawEngineDX9::DoFlush() { device_->SetVertexDeclaration(transformedVertexDecl_); if (result.drawIndexed) { - device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); - } else { + device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds + indsOffset, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); + } else if (result.drawNumTrans > 0) { device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex)); } } else if (result.action == SW_CLEAR) { diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 51798b32eb6d..e4d5bbac31be 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -357,7 +357,7 @@ void DrawEngineGLES::DoFlush() { if (prim == GE_PRIM_TRIANGLE_STRIP) prim = GE_PRIM_TRIANGLES; - u16 *inds = decIndex_; + u16 *const inds = decIndex_; SoftwareTransformResult result{}; // TODO: Keep this static? Faster than repopulating? SoftwareTransformParams params{}; @@ -414,8 +414,9 @@ void DrawEngineGLES::DoFlush() { // Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state. ApplyDrawState(prim); + int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -430,11 +431,11 @@ void DrawEngineGLES::DoFlush() { if (result.action == SW_DRAW_PRIMITIVES) { if (result.drawIndexed) { vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vertexBuffer); - indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer); + indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds + indsOffset, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer); render_->DrawIndexed( softwareInputLayout_, vertexBuffer, vertexBufferOffset, indexBuffer, indexBufferOffset, glprim[prim], result.drawNumTrans, GL_UNSIGNED_SHORT); - } else { + } else if (result.drawNumTrans > 0) { vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vertexBuffer); render_->Draw( softwareInputLayout_, vertexBuffer, vertexBufferOffset, glprim[prim], 0, result.drawNumTrans); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 491c3a44efc1..0f541f82589c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -858,7 +858,7 @@ void DrawEngineVulkan::DoFlush() { if (prim == GE_PRIM_TRIANGLE_STRIP) prim = GE_PRIM_TRIANGLES; - u16 *inds = decIndex_; + u16 *const inds = decIndex_; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded_; @@ -898,9 +898,10 @@ void DrawEngineVulkan::DoFlush() { // Games sometimes expect exact matches (see #12626, for example) for equal comparisons. if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f) result.action = SW_NOT_READY; + int indsOffset = 0; if (result.action == SW_NOT_READY) { swTransform.DetectOffsetTexture(maxIndex); - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); } if (result.setSafeSize) @@ -970,9 +971,9 @@ void DrawEngineVulkan::DoFlush() { if (result.drawIndexed) { VkBuffer vbuf, ibuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vbuf); - ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf); + ibOffset = (uint32_t)pushIndex_->Push(inds + indsOffset, sizeof(short) * result.drawNumTrans, 4, &ibuf); renderManager->DrawIndexed(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1); - } else { + } else if (result.drawNumTrans > 0) { VkBuffer vbuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vbuf); renderManager->Draw(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans);