From cc6491342e8cdd6bcea5fda5b5877e01b01da0d1 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 8 Feb 2022 23:11:12 -0800 Subject: [PATCH 1/4] softgpu: Prepare dedicated rectangle path. We're still sometimes using the slow rect-as-triangles path, let's do something faster. As a first step, just handle binning. --- GPU/Software/BinManager.cpp | 15 +++++ GPU/Software/BinManager.h | 2 + GPU/Software/Clipper.cpp | 117 ++++++------------------------------ GPU/Software/Rasterizer.cpp | 56 +++++++++++++++++ GPU/Software/Rasterizer.h | 1 + 5 files changed, 92 insertions(+), 99 deletions(-) diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp index 2a83f208cb83..5ac2d813cf03 100644 --- a/GPU/Software/BinManager.cpp +++ b/GPU/Software/BinManager.cpp @@ -74,6 +74,10 @@ static inline void DrawBinItem(const BinItem &item, const RasterizerState &state ClearRectangle(item.v0, item.v1, item.range, state); break; + case BinItemType::RECT: + DrawRectangle(item.v0, item.v1, item.range, state); + break; + case BinItemType::SPRITE: DrawSprite(item.v0, item.v1, item.range, state); break; @@ -316,6 +320,17 @@ void BinManager::AddClearRect(const VertexData &v0, const VertexData &v1) { Expand(range); } +void BinManager::AddRect(const VertexData &v0, const VertexData &v1) { + const BinCoords range = Range(v0, v1); + if (range.Invalid()) + return; + + if (queue_.Full()) + Drain(); + queue_.Push(BinItem{ BinItemType::RECT, stateIndex_, range, v0, v1 }); + Expand(range); +} + void BinManager::AddSprite(const VertexData &v0, const VertexData &v1) { const BinCoords range = Range(v0, v1); if (range.Invalid()) diff --git a/GPU/Software/BinManager.h b/GPU/Software/BinManager.h index 47bc1bba6f2f..b6f201801f2a 100644 --- a/GPU/Software/BinManager.h +++ b/GPU/Software/BinManager.h @@ -27,6 +27,7 @@ class DrawBinItemsTask; enum class BinItemType { TRIANGLE, CLEAR_RECT, + RECT, SPRITE, LINE, POINT, @@ -189,6 +190,7 @@ class BinManager { void AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2); void AddClearRect(const VertexData &v0, const VertexData &v1); + void AddRect(const VertexData &v0, const VertexData &v1); void AddSprite(const VertexData &v0, const VertexData &v1); void AddLine(const VertexData &v0, const VertexData &v1); void AddPoint(const VertexData &v0); diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 2b6386105c22..3627db0e3d18 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -117,17 +117,6 @@ inline float clip_dotprod(const VertexData &vert, float A, float B, float C, flo } \ } -static void RotateUV(const VertexData &tl, const VertexData &br, VertexData &tr, VertexData &bl) { - const int x1 = tl.screenpos.x; - const int x2 = br.screenpos.x; - const int y1 = tl.screenpos.y; - const int y2 = br.screenpos.y; - - if ((x1 < x2 && y1 > y2) || (x1 > x2 && y1 < y2)) { - std::swap(bl.texturecoords, tr.texturecoords); - } -} - static inline bool CheckOutsideZ(ClipCoords p, int &pos, int &neg) { constexpr float outsideValue = 1.000030517578125f; float z = p.z / p.w; @@ -156,102 +145,32 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) else if (outsidePos >= 2 || outsideNeg >= 2) return; - VertexData buf[4]; - buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); - buf[0].screenpos = TransformUnit::ClipToScreen(buf[0].clippos); - buf[0].texturecoords = v0.texturecoords; - - buf[1].clippos = ClipCoords(v0.clippos.x, v1.clippos.y, v1.clippos.z, v1.clippos.w); - buf[1].screenpos = TransformUnit::ClipToScreen(buf[1].clippos); - buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); - - buf[2].clippos = ClipCoords(v1.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); - buf[2].screenpos = TransformUnit::ClipToScreen(buf[2].clippos); - buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); - - buf[3] = v1; - - // Color and depth values of second vertex are used for the whole rectangle - buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; - buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; - buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth; - - VertexData* topleft = &buf[0]; - VertexData* topright = &buf[1]; - VertexData* bottomleft = &buf[2]; - VertexData* bottomright = &buf[3]; - - for (int i = 0; i < 4; ++i) { - if (buf[i].clippos.x < topleft->clippos.x && buf[i].clippos.y < topleft->clippos.y) - topleft = &buf[i]; - if (buf[i].clippos.x > topright->clippos.x && buf[i].clippos.y < topright->clippos.y) - topright = &buf[i]; - if (buf[i].clippos.x < bottomleft->clippos.x && buf[i].clippos.y > bottomleft->clippos.y) - bottomleft = &buf[i]; - if (buf[i].clippos.x > bottomright->clippos.x && buf[i].clippos.y > bottomright->clippos.y) - bottomright = &buf[i]; - } + if (v0.fogdepth != v1.fogdepth) { + // Rectangles seem to always use nearest along X for fog depth, but reversed. + // TODO: Check exactness of middle. + VertexData vhalf0 = v1; + vhalf0.screenpos.x = v0.screenpos.x + (v1.screenpos.x - v0.screenpos.x) / 2; - RotateUV(*topleft, *bottomright, *topright, *bottomleft); + VertexData vhalf1 = v1; + vhalf1.screenpos.x = v0.screenpos.x + (v1.screenpos.x - v0.screenpos.x) / 2; + vhalf1.screenpos.y = v0.screenpos.y; - // Four triangles to do backfaces as well. Two of them will get backface culled. - // We already clipped, so we don't need additional processing. - binner.AddTriangle(*topleft, *topright, *bottomright); - binner.AddTriangle(*bottomright, *topright, *topleft); - binner.AddTriangle(*bottomright, *bottomleft, *topleft); - binner.AddTriangle(*topleft, *bottomleft, *bottomright); + VertexData vrev1 = v1; + vrev1.fogdepth = v0.fogdepth; + + binner.AddRect(v0, vhalf0); + binner.AddRect(vhalf1, vrev1); + } else { + binner.AddRect(v0, v1); + } } else { // through mode handling - if (Rasterizer::RectangleFastPath(v0, v1, binner)) { return; - } - - VertexData buf[4]; - buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); - buf[0].texturecoords = v0.texturecoords; - - buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z); - buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); - - buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z); - buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); - - buf[3] = v1; - - // Color and depth values of second vertex are used for the whole rectangle - buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; - buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; // is color1 ever used in through mode? - buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f; - buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f; - - VertexData* topleft = &buf[0]; - VertexData* topright = &buf[1]; - VertexData* bottomleft = &buf[2]; - VertexData* bottomright = &buf[3]; - - // DrawTriangle always culls, so sort out the drawing order. - for (int i = 0; i < 4; ++i) { - if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y) - topleft = &buf[i]; - if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y) - topright = &buf[i]; - if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y) - bottomleft = &buf[i]; - if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y) - bottomright = &buf[i]; - } - - RotateUV(v0, v1, *topright, *bottomleft); - - if (gstate.isModeClear() && !gstate.isDitherEnabled()) { + } else if (gstate.isModeClear() && !gstate.isDitherEnabled()) { binner.AddClearRect(v0, v1); } else { - // Four triangles to do backfaces as well. Two of them will get backface culled. - binner.AddTriangle(*topleft, *topright, *bottomleft); - binner.AddTriangle(*bottomleft, *topright, *topleft); - binner.AddTriangle(*topright, *bottomright, *bottomleft); - binner.AddTriangle(*bottomleft, *bottomright, *topright); + binner.AddRect(v0, v1); } } } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 9f46945fac83..031f9a999a20 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -888,6 +888,62 @@ void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData & drawSlice(v0, v1, v2, range.x1, range.y1, range.x2, range.y2, state); } +static void RotateUV(const VertexData &tl, const VertexData &br, VertexData &tr, VertexData &bl) { + const int x1 = tl.screenpos.x; + const int x2 = br.screenpos.x; + const int y1 = tl.screenpos.y; + const int y2 = br.screenpos.y; + + if ((x1 < x2 && y1 > y2) || (x1 > x2 && y1 < y2)) { + std::swap(bl.texturecoords, tr.texturecoords); + } +} + +void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) { + VertexData buf[4]; + buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); + buf[0].texturecoords = v0.texturecoords; + + buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z); + buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); + + buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z); + buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); + + buf[3] = v1; + + // Color and depth values of second vertex are used for the whole rectangle + buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w; + // TODO + buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth; + + VertexData *topleft = &buf[0]; + VertexData *topright = &buf[1]; + VertexData *bottomleft = &buf[2]; + VertexData *bottomright = &buf[3]; + + // DrawTriangle always culls, so sort out the drawing order. + for (int i = 0; i < 4; ++i) { + if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y) + topleft = &buf[i]; + if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y) + topright = &buf[i]; + if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y) + bottomleft = &buf[i]; + if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y) + bottomright = &buf[i]; + } + + RotateUV(v0, v1, *topright, *bottomleft); + + DrawTriangle(*topleft, *topright, *bottomleft, range, state); + DrawTriangle(*bottomleft, *topright, *topleft, range, state); + DrawTriangle(*topright, *bottomright, *bottomleft, range, state); + DrawTriangle(*bottomleft, *bottomright, *topright, range, state); +} + void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerState &state) { ScreenCoords pos = v0.screenpos; Vec4 prim_color = v0.color0; diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 0e6d289163dd..6b55276854de 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -71,6 +71,7 @@ void ComputeRasterizerState(RasterizerState *state); // Draws a triangle if its vertices are specified in counter-clockwise order void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state); +void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state); void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerState &state); void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state); void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state); From e1eb4ba94aa331a363f14121a5f17fcd560553e6 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 9 Feb 2022 20:49:35 -0800 Subject: [PATCH 2/4] softgpu: Directly implement rectangle drawing. --- GPU/Software/Rasterizer.cpp | 185 +++++++++++++++++++++++++++--------- 1 file changed, 142 insertions(+), 43 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 031f9a999a20..1330fafe76bd 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -888,60 +888,159 @@ void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData & drawSlice(v0, v1, v2, range.x1, range.y1, range.x2, range.y2, state); } -static void RotateUV(const VertexData &tl, const VertexData &br, VertexData &tr, VertexData &bl) { - const int x1 = tl.screenpos.x; - const int x2 = br.screenpos.x; - const int y1 = tl.screenpos.y; - const int y2 = br.screenpos.y; - - if ((x1 < x2 && y1 > y2) || (x1 > x2 && y1 < y2)) { - std::swap(bl.texturecoords, tr.texturecoords); +void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) { + int entireX1 = std::min(v0.screenpos.x, v1.screenpos.x); + int entireY1 = std::min(v0.screenpos.y, v1.screenpos.y); + int entireX2 = std::max(v0.screenpos.x, v1.screenpos.x) - 1; + int entireY2 = std::max(v0.screenpos.y, v1.screenpos.y) - 1; + int minX = std::max(entireX1, range.x1); + int minY = std::max(entireY1, range.y1); + int maxX = std::min(entireX2, range.x2); + int maxY = std::min(entireY2, range.y2); + + Vec2f rowST(0.0f, 0.0f); + // Note: this is double the x or y movement. + Vec2f stx(0.0f, 0.0f); + Vec2f sty(0.0f, 0.0f); + if (state.enableTextures) { + Vec2f tc0 = v0.texturecoords; + Vec2f tc1 = v1.texturecoords; + if (state.throughMode) { + // For levels > 0, mipmapping is always based on level 0. Simpler to scale first. + tc0.s() *= 1.0f / (float)(1 << state.samplerID.width0Shift); + tc1.s() *= 1.0f / (float)(1 << state.samplerID.width0Shift); + tc0.t() *= 1.0f / (float)(1 << state.samplerID.height0Shift); + tc1.t() *= 1.0f / (float)(1 << state.samplerID.height0Shift); + } + + int diffX = (entireX2 - entireX1 + 1) / SCREEN_SCALE_FACTOR; + int diffY = (entireY2 - entireY1 + 1) / SCREEN_SCALE_FACTOR; + float diffS = tc1.s() - tc0.s(); + float diffT = tc1.t() - tc0.t(); + + if (v0.screenpos.x < v1.screenpos.x) { + if (v0.screenpos.y < v1.screenpos.y) { + // Okay, simple, TL -> BR. S and T move toward v1 with X and Y. + rowST = tc0; + stx = Vec2f(2.0f * diffS / diffX, 0.0f); + sty = Vec2f(0.0f, 2.0f * diffT / diffY); + } else { + // BL to TR, rotated. We start at TL still. + // X moves T (not S) toward v1, and Y moves S away from v1. + rowST = Vec2f(tc1.s() - diffS / diffY, tc0.t()); + stx = Vec2f(0.0f, 2.0f * diffT / diffX); + sty = Vec2f(2.0f * -diffS / diffY, 0.0f); + } + } else { + if (v0.screenpos.y < v1.screenpos.y) { + // TR to BL. Like BL to TR, rotated. + // X moves T (not s) away from v1, and Y moves S toward v1. + rowST = Vec2f(tc0.s(), tc1.t() - diffT / diffX); + stx = Vec2f(0.0f, 2.0f * -diffT / diffX); + sty = Vec2f(2.0f * diffS / diffY, 0.0f); + } else { + // BR to TL, just inverse of TL to BR. + rowST = Vec2f(tc1.s() - diffS / diffX, tc1.t() - diffT / diffY); + stx = Vec2f(2.0f * -diffS / diffX, 0.0f); + sty = Vec2f(0.0f, 2.0f * -diffT / diffY); + } + } + + // Okay, now move ST to the minX, minY position. + rowST += (stx / (float)(SCREEN_SCALE_FACTOR * 2)) * (minX - entireX1); + rowST += (sty / (float)(SCREEN_SCALE_FACTOR * 2)) * (minY - entireY1); } -} -void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) { - VertexData buf[4]; - buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); - buf[0].texturecoords = v0.texturecoords; + // And now what we add to spread out to 4 values. + const Vec4f sto4(0.0f, 0.5f * stx.s(), 0.5f * sty.s(), 0.5f * stx.s() + 0.5f * sty.s()); + const Vec4f tto4(0.0f, 0.5f * stx.t(), 0.5f * sty.t(), 0.5f * stx.t() + 0.5f * sty.t()); - buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z); - buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); + ScreenCoords pprime(minX, minY, 0); + Vec4 fog = Vec4::AssignToAll(ClampFogDepth(v1.fogdepth)); + Vec4 z = Vec4::AssignToAll(v1.screenpos.z); + Vec3 sec_color = v1.color1; - buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z); - buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); +#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC) + uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2; + std::string tag = StringFromFormat("DisplayListR_%08x", state.listPC); + std::string ztag = StringFromFormat("DisplayListRZ_%08x", state.listPC); +#endif - buf[3] = v1; + for (int64_t curY = minY; curY <= maxY; curY += SCREEN_SCALE_FACTOR * 2, rowST += sty) { + DrawingCoords p = TransformUnit::ScreenToDrawing(range.x1, curY); - // Color and depth values of second vertex are used for the whole rectangle - buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; - buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; - buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w; - // TODO - buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth; + int scissorY2 = curY + SCREEN_SCALE_FACTOR > maxY ? -1 : 0; + Vec4 scissor_mask = Vec4(0, maxX - minX - SCREEN_SCALE_FACTOR, scissorY2, (maxX - minX - SCREEN_SCALE_FACTOR) | scissorY2); + Vec4 scissor_step = Vec4(0, -(SCREEN_SCALE_FACTOR * 2), 0, -(SCREEN_SCALE_FACTOR * 2)); + Vec2f st = rowST; - VertexData *topleft = &buf[0]; - VertexData *topright = &buf[1]; - VertexData *bottomleft = &buf[2]; - VertexData *bottomright = &buf[3]; + for (int64_t curX = minX; curX <= maxX; curX += SCREEN_SCALE_FACTOR * 2, + st += stx, + scissor_mask += scissor_step, + p.x = (p.x + 2) & 0x3FF) { + Vec4 mask = scissor_mask; - // DrawTriangle always culls, so sort out the drawing order. - for (int i = 0; i < 4; ++i) { - if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y) - topleft = &buf[i]; - if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y) - topright = &buf[i]; - if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y) - bottomleft = &buf[i]; - if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y) - bottomright = &buf[i]; + Vec4 prim_color[4]; + for (int i = 0; i < 4; ++i) { + prim_color[i] = v1.color0; + } + + if (state.enableTextures) { + Vec4 s, t; + s = Vec4::AssignToAll(st.s()) + sto4; + t = Vec4::AssignToAll(st.t()) + tto4; + + ApplyTexturing(state, prim_color, mask, s, t, curX, curY); + } + + if (!state.pixelID.clearMode) { + for (int i = 0; i < 4; ++i) { +#if defined(_M_SSE) + // TODO: Tried making Vec4 do this, but things got slower. + const __m128i sec = _mm_and_si128(sec_color.ivec, _mm_set_epi32(0, -1, -1, -1)); + prim_color[i].ivec = _mm_add_epi32(prim_color[i].ivec, sec); +#else + prim_color[i] += Vec4(sec_color, 0); +#endif + } + } + + PROFILE_THIS_SCOPE("draw_rect_px"); + DrawingCoords subp = p; + for (int i = 0; i < 4; ++i) { + if (mask[i] < 0) { + continue; + } + subp.x = p.x + (i & 1); + subp.y = p.y + (i / 2); + + state.drawPixel(subp.x, subp.y, z[i], fog[i], ToVec4IntArg(prim_color[i]), state.pixelID); + +#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) + uint32_t row = gstate.getFrameBufAddress() + subp.y * state.pixelID.cached.framebufStride * bpp; + NotifyMemInfo(MemBlockFlags::WRITE, row + subp.x * bpp, bpp, tag.c_str(), tag.size()); + if (state.pixelID.depthWrite) { + row = gstate.getDepthBufAddress() + subp.y * state.pixelID.cached.depthbufStride * 2; + NotifyMemInfo(MemBlockFlags::WRITE, row + subp.x * 2, 2, ztag.c_str(), ztag.size()); + } +#endif + } + } } - RotateUV(v0, v1, *topright, *bottomleft); +#if !defined(SOFTGPU_MEMORY_TAGGING_DETAILED) && defined(SOFTGPU_MEMORY_TAGGING_BASIC) + for (int y = minY; y <= maxY; y += SCREEN_SCALE_FACTOR) { + DrawingCoords p = TransformUnit::ScreenToDrawing(minX, y, state.screenOffsetX, state.screenOffsetY); + DrawingCoords pend = TransformUnit::ScreenToDrawing(maxX, y, state.screenOffsetX, state.screenOffsetY); + uint32_t row = gstate.getFrameBufAddress() + p.y * state.pixelID.cached.framebufStride * bpp; + NotifyMemInfo(MemBlockFlags::WRITE, row + p.x * bpp, (pend.x - p.x) * bpp, tag.c_str(), tag.size()); - DrawTriangle(*topleft, *topright, *bottomleft, range, state); - DrawTriangle(*bottomleft, *topright, *topleft, range, state); - DrawTriangle(*topright, *bottomright, *bottomleft, range, state); - DrawTriangle(*bottomleft, *bottomright, *topright, range, state); + if (state.pixelID.depthWrite) { + row = gstate.getDepthBufAddress() + p.y * state.pixelID.cached.depthbufStride * 2; + NotifyMemInfo(MemBlockFlags::WRITE, row + p.x * 2, (pend.x - p.x) * 2, ztag.c_str(), ztag.size()); + } + } +#endif } void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerState &state) { From df1a91ee2521edca49eb459858148cddb1bb2c8d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 20 Feb 2022 10:25:00 -0800 Subject: [PATCH 3/4] samplerjit: Correct nearest negative texture clamp. Was not clamping to zero when negative. --- GPU/Software/SamplerX86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Software/SamplerX86.cpp b/GPU/Software/SamplerX86.cpp index 9471ff61a5e1..415eed272383 100644 --- a/GPU/Software/SamplerX86.cpp +++ b/GPU/Software/SamplerX86.cpp @@ -2626,7 +2626,7 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) { CVTTPS2DQ(sReg, R(sReg)); regCache_.Release(sizesReg, RegCache::VEC_TEMP0); - PSRLD(sReg, 8); + PSRAD(sReg, 8); // Reuse tempXYReg for the level1 values. if (!cpu_info.bSSE4_1) @@ -2685,7 +2685,7 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) { MULPS(sReg, M(constWidthHeight256f_)); CVTTPS2DQ(sReg, R(sReg)); // Great, shift out the fraction. - PSRLD(sReg, 8); + PSRAD(sReg, 8); // Square textures are kinda common. bool clampApplied = false; From ff5edb2bbc95eadaade3f2d68b5d737963cdd908 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 20 Feb 2022 10:50:42 -0800 Subject: [PATCH 4/4] softgpu: Correct accounting for pixel center. Filtering is still not perfect but this makes different orientations better. --- GPU/Software/Rasterizer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 1330fafe76bd..4a44c1c26125 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -893,8 +893,8 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords & int entireY1 = std::min(v0.screenpos.y, v1.screenpos.y); int entireX2 = std::max(v0.screenpos.x, v1.screenpos.x) - 1; int entireY2 = std::max(v0.screenpos.y, v1.screenpos.y) - 1; - int minX = std::max(entireX1, range.x1); - int minY = std::max(entireY1, range.y1); + int minX = std::max(entireX1, range.x1) | (SCREEN_SCALE_FACTOR / 2 - 1); + int minY = std::max(entireY1, range.y1) | (SCREEN_SCALE_FACTOR / 2 - 1); int maxX = std::min(entireX2, range.x2); int maxY = std::min(entireY2, range.y2); @@ -927,7 +927,7 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords & } else { // BL to TR, rotated. We start at TL still. // X moves T (not S) toward v1, and Y moves S away from v1. - rowST = Vec2f(tc1.s() - diffS / diffY, tc0.t()); + rowST = Vec2f(tc1.s(), tc0.t()); stx = Vec2f(0.0f, 2.0f * diffT / diffX); sty = Vec2f(2.0f * -diffS / diffY, 0.0f); } @@ -935,12 +935,12 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords & if (v0.screenpos.y < v1.screenpos.y) { // TR to BL. Like BL to TR, rotated. // X moves T (not s) away from v1, and Y moves S toward v1. - rowST = Vec2f(tc0.s(), tc1.t() - diffT / diffX); + rowST = Vec2f(tc0.s(), tc1.t()); stx = Vec2f(0.0f, 2.0f * -diffT / diffX); sty = Vec2f(2.0f * diffS / diffY, 0.0f); } else { // BR to TL, just inverse of TL to BR. - rowST = Vec2f(tc1.s() - diffS / diffX, tc1.t() - diffT / diffY); + rowST = Vec2f(tc1.s(), tc1.t()); stx = Vec2f(2.0f * -diffS / diffX, 0.0f); sty = Vec2f(0.0f, 2.0f * -diffT / diffY); }