diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 87dc9cb626b6..fcad4fd366e4 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -155,13 +155,13 @@ static Vec3f ClipToScreen(const Vec4f& coords) { float z = coords.z * zScale / coords.w + zCenter; // 16 = 0xFFFF / 4095.9375 - return Vec3f(x * 16, y * 16, z); + return Vec3f(x * 16 - gstate.getOffsetX16(), y * 16 - gstate.getOffsetY16(), z); } static Vec3f ScreenToDrawing(const Vec3f& coords) { Vec3f ret; - ret.x = (coords.x - gstate.getOffsetX16()) * (1.0f / 16.0f); - ret.y = (coords.y - gstate.getOffsetY16()) * (1.0f / 16.0f); + ret.x = coords.x * (1.0f / 16.0f); + ret.y = coords.y * (1.0f / 16.0f); ret.z = coords.z; return ret; } diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp index a8eeebc622ed..2a83f208cb83 100644 --- a/GPU/Software/BinManager.cpp +++ b/GPU/Software/BinManager.cpp @@ -185,15 +185,8 @@ void BinManager::UpdateState() { scissor_.x1 = screenScissorTL.x; scissor_.y1 = screenScissorTL.y; - scissor_.x2 = screenScissorBR.x + 15; - scissor_.y2 = screenScissorBR.y + 15; - - // Our bin sizes are based on offset, so if that changes we have to flush. - if (queueOffsetX_ != gstate.getOffsetX16() || queueOffsetY_ != gstate.getOffsetY16()) { - Flush("offset"); - queueOffsetX_ = gstate.getOffsetX16(); - queueOffsetY_ = gstate.getOffsetY16(); - } + scissor_.x2 = screenScissorBR.x + SCREEN_SCALE_FACTOR - 1; + scissor_.y2 = screenScissorBR.y + SCREEN_SCALE_FACTOR - 1; // If we're about to texture from something still pending (i.e. depth), flush. if (HasTextureWrite(state)) @@ -294,6 +287,7 @@ void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const V Vec2 d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y); // Drop primitives which are not in CCW order by checking the cross product. + static_assert(SCREEN_SCALE_FACTOR <= 16, "Fails if scale factor is too high"); if (d01.x * d02.y - d01.y * d02.x < 0) return; // If all points have identical coords, we'll have 0 weights and not skip properly, so skip here. @@ -360,23 +354,23 @@ void BinManager::Drain() { // If the waitable has fully drained, we can update our binning decisions. if (!tasksSplit_ || waitable_->Empty()) { - int w2 = (queueRange_.x2 - queueRange_.x1 + 31) / 32; - int h2 = (queueRange_.y2 - queueRange_.y1 + 31) / 32; + int w2 = (queueRange_.x2 - queueRange_.x1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2); + int h2 = (queueRange_.y2 - queueRange_.y1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2); // Always bin the entire possible range, but focus on the drawn area. - ScreenCoords tl(queueOffsetX_, queueOffsetY_, 0); - ScreenCoords br(queueOffsetX_ + 1024 * 16, queueOffsetY_ + 1024 * 16, 0); + ScreenCoords tl(0, 0, 0); + ScreenCoords br(1024 * SCREEN_SCALE_FACTOR, 1024 * SCREEN_SCALE_FACTOR, 0); taskRanges_.clear(); if (h2 >= 18 && w2 >= h2 * 4) { - int bin_w = std::max(4, (w2 + maxTasks_ - 1) / maxTasks_) * 32; + int bin_w = std::max(4, (w2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2; taskRanges_.push_back(BinCoords{ tl.x, tl.y, queueRange_.x1 + bin_w - 1, br.y - 1 }); for (int x = queueRange_.x1 + bin_w; x <= queueRange_.x2; x += bin_w) { int x2 = x + bin_w > queueRange_.x2 ? br.x : x + bin_w; taskRanges_.push_back(BinCoords{ x, tl.y, x2 - 1, br.y - 1 }); } } else if (h2 >= 18 && w2 >= 18) { - int bin_h = std::max(4, (h2 + maxTasks_ - 1) / maxTasks_) * 32; + int bin_h = std::max(4, (h2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2; taskRanges_.push_back(BinCoords{ tl.x, tl.y, br.x - 1, queueRange_.y1 + bin_h - 1 }); for (int y = queueRange_.y1 + bin_h; y <= queueRange_.y2; y += bin_h) { int y2 = y + bin_h > queueRange_.y2 ? br.y : y + bin_h; @@ -564,28 +558,28 @@ BinCoords BinManager::Scissor(BinCoords range) { BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1, const VertexData &v2) { BinCoords range; - range.x1 = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~0xF; - range.y1 = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~0xF; - range.x2 = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) | 0xF; - range.y2 = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) | 0xF; + range.x1 = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1); + range.y1 = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1); + range.x2 = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) | (SCREEN_SCALE_FACTOR - 1); + range.y2 = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) | (SCREEN_SCALE_FACTOR - 1); return Scissor(range); } BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1) { BinCoords range; - range.x1 = std::min(v0.screenpos.x, v1.screenpos.x) & ~0xF; - range.y1 = std::min(v0.screenpos.y, v1.screenpos.y) & ~0xF; - range.x2 = std::max(v0.screenpos.x, v1.screenpos.x) | 0xF; - range.y2 = std::max(v0.screenpos.y, v1.screenpos.y) | 0xF; + range.x1 = std::min(v0.screenpos.x, v1.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1); + range.y1 = std::min(v0.screenpos.y, v1.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1); + range.x2 = std::max(v0.screenpos.x, v1.screenpos.x) | (SCREEN_SCALE_FACTOR - 1); + range.y2 = std::max(v0.screenpos.y, v1.screenpos.y) | (SCREEN_SCALE_FACTOR - 1); return Scissor(range); } BinCoords BinManager::Range(const VertexData &v0) { BinCoords range; - range.x1 = v0.screenpos.x & ~0xF; - range.y1 = v0.screenpos.y & ~0xF; - range.x2 = v0.screenpos.x | 0xF; - range.y2 = v0.screenpos.y | 0xF; + range.x1 = v0.screenpos.x & ~(SCREEN_SCALE_FACTOR - 1); + range.y1 = v0.screenpos.y & ~(SCREEN_SCALE_FACTOR - 1); + range.x2 = v0.screenpos.x | (SCREEN_SCALE_FACTOR - 1); + range.y2 = v0.screenpos.y | (SCREEN_SCALE_FACTOR - 1); return Scissor(range); } @@ -595,7 +589,7 @@ void BinManager::Expand(const BinCoords &range) { queueRange_.x2 = std::max(queueRange_.x2, range.x2); queueRange_.y2 = std::max(queueRange_.y2, range.y2); - if (maxTasks_ == 1 || (queueRange_.y2 - queueRange_.y1 >= 224 * 16 && enqueues_ < 36 * maxTasks_)) { + if (maxTasks_ == 1 || (queueRange_.y2 - queueRange_.y1 >= 224 * SCREEN_SCALE_FACTOR && enqueues_ < 36 * maxTasks_)) { Drain(); } } diff --git a/GPU/Software/BinManager.h b/GPU/Software/BinManager.h index b206b8deaee8..47bc1bba6f2f 100644 --- a/GPU/Software/BinManager.h +++ b/GPU/Software/BinManager.h @@ -234,8 +234,6 @@ class BinManager { BinCoords scissor_; BinItemQueue queue_; BinCoords queueRange_; - int queueOffsetX_ = -1; - int queueOffsetY_ = -1; SoftDirty dirty_ = SoftDirty::NONE; int maxTasks_ = 1; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 616f8d797d98..9f46945fac83 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -549,16 +549,18 @@ static inline __m128i SOFTRAST_CALL TriangleEdgeStartSSE4(__m128i initX, __m128i template Vec4 TriangleEdge::Start(const ScreenCoords &v0, const ScreenCoords &v1, const ScreenCoords &origin) { // Start at pixel centers. - Vec4 initX = Vec4::AssignToAll(origin.x) + Vec4(7, 23, 7, 23); - Vec4 initY = Vec4::AssignToAll(origin.y) + Vec4(7, 7, 23, 23); + static constexpr int centerOff = (SCREEN_SCALE_FACTOR / 2) - 1; + static constexpr int centerPlus1 = SCREEN_SCALE_FACTOR + centerOff; + Vec4 initX = Vec4::AssignToAll(origin.x) + Vec4(centerOff, centerPlus1, centerOff, centerPlus1); + Vec4 initY = Vec4::AssignToAll(origin.y) + Vec4(centerOff, centerOff, centerPlus1, centerPlus1); // orient2d refactored. int xf = v0.y - v1.y; int yf = v1.x - v0.x; int c = v1.y * v0.x - v1.x * v0.y; - stepX = Vec4::AssignToAll(xf * 16 * 2); - stepY = Vec4::AssignToAll(yf * 16 * 2); + stepX = Vec4::AssignToAll(xf * SCREEN_SCALE_FACTOR * 2); + stepY = Vec4::AssignToAll(yf * SCREEN_SCALE_FACTOR * 2); #if defined(_M_SSE) && !PPSSPP_ARCH(X86) if (useSSE4) @@ -611,7 +613,7 @@ void TriangleEdge::NarrowMinMaxX(const Vec4 &w, int64_t minX, int6 if (wmax < 0) { if (stepX.x > 0) { int steps = -wmax / stepX.x; - rowMinX = std::max(rowMinX, minX + steps * 16 * 2); + rowMinX = std::max(rowMinX, minX + steps * SCREEN_SCALE_FACTOR * 2); } else if (stepX.x <= 0) { rowMinX = rowMaxX + 1; } @@ -619,7 +621,7 @@ void TriangleEdge::NarrowMinMaxX(const Vec4 &w, int64_t minX, int6 if (wmax >= 0 && stepX.x < 0) { int steps = (-wmax / stepX.x) + 1; - rowMaxX = std::min(rowMaxX, minX + steps * 16 * 2); + rowMaxX = std::min(rowMaxX, minX + steps * SCREEN_SCALE_FACTOR * 2); } } @@ -727,7 +729,7 @@ void DrawTriangleSlice( std::string ztag = StringFromFormat("DisplayListTZ_%08x", state.listPC); #endif - for (int64_t curY = minY; curY <= maxY; curY += 32, + for (int64_t curY = minY; curY <= maxY; curY += SCREEN_SCALE_FACTOR * 2, w0_base = e0.StepY(w0_base), w1_base = e1.StepY(w1_base), w2_base = e2.StepY(w2_base)) { @@ -735,25 +737,25 @@ void DrawTriangleSlice( Vec4 w1 = w1_base; Vec4 w2 = w2_base; - DrawingCoords p = TransformUnit::ScreenToDrawing(minX, curY, state.screenOffsetX, state.screenOffsetY); + DrawingCoords p = TransformUnit::ScreenToDrawing(minX, curY); int64_t rowMinX = minX, rowMaxX = maxX; e0.NarrowMinMaxX(w0, minX, rowMinX, rowMaxX); e1.NarrowMinMaxX(w1, minX, rowMinX, rowMaxX); e2.NarrowMinMaxX(w2, minX, rowMinX, rowMaxX); - int skipX = (rowMinX - minX) / 32; + int skipX = (rowMinX - minX) / (SCREEN_SCALE_FACTOR * 2); w0 = e0.StepXTimes(w0, skipX); w1 = e1.StepXTimes(w1, skipX); w2 = e2.StepXTimes(w2, skipX); p.x = (p.x + 2 * skipX) & 0x3FF; // TODO: Maybe we can clip the edges instead? - int scissorYPlus1 = curY + 16 > maxY ? -1 : 0; - Vec4 scissor_mask = Vec4(0, rowMaxX - rowMinX - 16, scissorYPlus1, (rowMaxX - rowMinX - 16) | scissorYPlus1); - Vec4 scissor_step = Vec4(0, -32, 0, -32); + int scissorYPlus1 = curY + SCREEN_SCALE_FACTOR > maxY ? -1 : 0; + Vec4 scissor_mask = Vec4(0, rowMaxX - rowMinX - SCREEN_SCALE_FACTOR, scissorYPlus1, (rowMaxX - rowMinX - SCREEN_SCALE_FACTOR) | scissorYPlus1); + Vec4 scissor_step = Vec4(0, -(SCREEN_SCALE_FACTOR * 2), 0, -(SCREEN_SCALE_FACTOR * 2)); - for (int64_t curX = rowMinX; curX <= rowMaxX; curX += 32, + for (int64_t curX = rowMinX; curX <= rowMaxX; curX += SCREEN_SCALE_FACTOR * 2, w0 = e0.StepX(w0), w1 = e1.StepX(w1), w2 = e2.StepX(w2), @@ -861,9 +863,9 @@ void DrawTriangleSlice( } #if !defined(SOFTGPU_MEMORY_TAGGING_DETAILED) && defined(SOFTGPU_MEMORY_TAGGING_BASIC) - for (int y = minY; y <= maxY; y += 16) { - DrawingCoords p = TransformUnit::ScreenToDrawing(minX, y, state.screenOffsetX, state.screenOffsetY); - DrawingCoords pend = TransformUnit::ScreenToDrawing(maxX, y, state.screenOffsetX, state.screenOffsetY); + for (int y = minY; y <= maxY; y += SCREEN_SCALE_FACTOR) { + DrawingCoords p = TransformUnit::ScreenToDrawing(minX, y); + DrawingCoords pend = TransformUnit::ScreenToDrawing(maxX, y); uint32_t row = gstate.getFrameBufAddress() + p.y * pixelID.cached.framebufStride * bpp; NotifyMemInfo(MemBlockFlags::WRITE, row + p.x * bpp, (pend.x - p.x) * bpp, tag.c_str(), tag.size()); @@ -916,7 +918,7 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta if (!pixelID.clearMode) prim_color += Vec4(sec_color, 0); - DrawingCoords p = TransformUnit::ScreenToDrawing(pos, state.screenOffsetX, state.screenOffsetY); + DrawingCoords p = TransformUnit::ScreenToDrawing(pos); u16 z = pos.z; u8 fog = 255; @@ -943,13 +945,13 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta } void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) { - DrawingCoords pprime = TransformUnit::ScreenToDrawing(range.x1, range.y1, state.screenOffsetX, state.screenOffsetY); - DrawingCoords pend = TransformUnit::ScreenToDrawing(range.x2, range.y2, state.screenOffsetX, state.screenOffsetY); + DrawingCoords pprime = TransformUnit::ScreenToDrawing(range.x1, range.y1); + DrawingCoords pend = TransformUnit::ScreenToDrawing(range.x2, range.y2); auto &pixelID = state.pixelID; auto &samplerID = state.samplerID; // Min and max are in PSP fixed point screen coordinates, 16 here is for the 4 subpixel bits. - const int w = (range.x2 - range.x1 + 1) / 16; + const int w = (range.x2 - range.x1 + 1) / SCREEN_SCALE_FACTOR; if (w <= 0) return; @@ -1129,14 +1131,14 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range int steps; if (abs(dx) < abs(dy)) - steps = abs(dy) / 16; + steps = abs(dy) / SCREEN_SCALE_FACTOR; else - steps = abs(dx) / 16; + steps = abs(dx) / SCREEN_SCALE_FACTOR; // Avoid going too far since we typically don't start at the pixel center. - if (dx < 0 && dx >= -16) + if (dx < 0 && dx >= -SCREEN_SCALE_FACTOR) dx++; - if (dy < 0 && dy >= -16) + if (dy < 0 && dy >= -SCREEN_SCALE_FACTOR) dy++; double xinc = (double)dx / steps; @@ -1199,8 +1201,8 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range } // If inc is 0, force the delta to zero. - float ds = xinc == 0.0 ? 0.0f : (s1 - s) * 16.0f * (1.0f / xinc); - float dt = yinc == 0.0 ? 0.0f : (t1 - t) * 16.0f * (1.0f / yinc); + float ds = xinc == 0.0 ? 0.0f : (s1 - s) * (float)SCREEN_SCALE_FACTOR * (1.0f / xinc); + float dt = yinc == 0.0 ? 0.0f : (t1 - t) * (float)SCREEN_SCALE_FACTOR * (1.0f / yinc); int texLevel; int texLevelFrac; @@ -1209,7 +1211,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range if (state.antialiasLines) { // TODO: This is a naive and wrong implementation. - DrawingCoords p0 = TransformUnit::ScreenToDrawing(x, y, state.screenOffsetX, state.screenOffsetY); + DrawingCoords p0 = TransformUnit::ScreenToDrawing(x, y); s = ((float)p0.x + xinc / 32.0f) / 512.0f; t = ((float)p0.y + yinc / 32.0f) / 512.0f; @@ -1224,7 +1226,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range prim_color += Vec4(sec_color, 0); PROFILE_THIS_SCOPE("draw_px"); - DrawingCoords p = TransformUnit::ScreenToDrawing(x, y, state.screenOffsetX, state.screenOffsetY); + DrawingCoords p = TransformUnit::ScreenToDrawing(x, y); state.drawPixel(p.x, p.y, z, fog, ToVec4IntArg(prim_color), pixelID); #if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC) diff --git a/GPU/Software/RasterizerRectangle.cpp b/GPU/Software/RasterizerRectangle.cpp index 336d62474e9b..bf24c391922c 100644 --- a/GPU/Software/RasterizerRectangle.cpp +++ b/GPU/Software/RasterizerRectangle.cpp @@ -88,12 +88,12 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran auto &pixelID = state.pixelID; auto &samplerID = state.samplerID; - DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos, state.screenOffsetX, state.screenOffsetY); + DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos); // Include the ending pixel based on its center, not start. - DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos + ScreenCoords(7, 7, 0), state.screenOffsetX, state.screenOffsetY); + DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos + ScreenCoords(7, 7, 0)); - DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1, state.screenOffsetX, state.screenOffsetY); - DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2, state.screenOffsetX, state.screenOffsetY); + DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1); + DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2); int z = v1.screenpos.z; int fog = 255; @@ -249,10 +249,10 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran bool g_needsClearAfterDialog = false; -static inline bool NoClampOrWrap(const Vec2f &tc) { +static inline bool NoClampOrWrap(const RasterizerState &state, const Vec2f &tc) { if (tc.x < 0 || tc.y < 0) return false; - return tc.x <= gstate.getTextureWidth(0) && tc.y <= gstate.getTextureHeight(0); + return tc.x <= state.samplerID.cached.sizes[0].w && tc.y <= state.samplerID.cached.sizes[0].h; } // Returns true if the normal path should be skipped. @@ -263,15 +263,15 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b // Check for 1:1 texture mapping. In that case we can call DrawSprite. int xdiff = v1.screenpos.x - v0.screenpos.x; int ydiff = v1.screenpos.y - v0.screenpos.y; - int udiff = (v1.texturecoords.x - v0.texturecoords.x) * 16.0f; - int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * 16.0f; + int udiff = (v1.texturecoords.x - v0.texturecoords.x) * (float)SCREEN_SCALE_FACTOR; + int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * (float)SCREEN_SCALE_FACTOR; bool coord_check = (xdiff == udiff || xdiff == -udiff) && (ydiff == vdiff || ydiff == -vdiff); // Currently only works for TL/BR, which is the most common but not required. bool orient_check = xdiff >= 0 && ydiff >= 0; // We already have a fast path for clear in ClearRectangle. - bool state_check = !state.pixelID.clearMode && !state.samplerID.hasAnyMips && NoClampOrWrap(v0.texturecoords) && NoClampOrWrap(v1.texturecoords); + bool state_check = !state.pixelID.clearMode && !state.samplerID.hasAnyMips && NoClampOrWrap(state, v0.texturecoords) && NoClampOrWrap(state, v1.texturecoords); if ((coord_check || !state.enableTextures) && orient_check && state_check) { binner.AddSprite(v0, v1); return true; @@ -282,9 +282,9 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) { // check for save/load dialog. if (!currentDialogActive) { - if (v0.screenpos.x == 0x7100 && v0.screenpos.y == 0x7780 && v1.screenpos.x == 0x8f00 && v1.screenpos.y == 0x8880) { + if (v0.screenpos.x + state.screenOffsetX == 0x7100 && v0.screenpos.y + state.screenOffsetY == 0x7780 && v1.screenpos.x + state.screenOffsetX == 0x8f00 && v1.screenpos.y + state.screenOffsetY == 0x8880) { g_DarkStalkerStretch = DSStretch::Wide; - } else if (v0.screenpos.x == 0x7400 && v0.screenpos.y == 0x7780 && v1.screenpos.x == 0x8C00 && v1.screenpos.y == 0x8880) { + } else if (v0.screenpos.x + state.screenOffsetX == 0x7400 && v0.screenpos.y + state.screenOffsetY == 0x7780 && v1.screenpos.x + state.screenOffsetX == 0x8C00 && v1.screenpos.y + state.screenOffsetY == 0x8880) { g_DarkStalkerStretch = DSStretch::Normal; } else { return false; @@ -456,8 +456,8 @@ bool DetectRectangleThroughModeSlices(const RasterizerState &state, const Vertex return false; // We might be able to compare ratios, but let's expect 1:1. - int texdiff1 = (texbr1.x - textl1.x) * 16.0f; - int texdiff2 = (texbr2.x - textl2.x) * 16.0f; + int texdiff1 = (texbr1.x - textl1.x) * (float)SCREEN_SCALE_FACTOR; + int texdiff2 = (texbr2.x - textl2.x) * (float)SCREEN_SCALE_FACTOR; int posdiff1 = br1.x - tl1.x; int posdiff2 = br2.x - tl2.x; return texdiff1 == posdiff1 && texdiff2 == posdiff2; diff --git a/GPU/Software/Sampler.cpp b/GPU/Software/Sampler.cpp index fb61db3d1fff..8aad116b141b 100644 --- a/GPU/Software/Sampler.cpp +++ b/GPU/Software/Sampler.cpp @@ -418,8 +418,8 @@ static inline void GetTexelCoordinates(int level, float s, float t, int &out_u, int width = samplerID.cached.sizes[level].w; int height = samplerID.cached.sizes[level].h; - int base_u = (int)(s * width * 256.0f) + 12 - x; - int base_v = (int)(t * height * 256.0f) + 12 - y; + int base_u = (int)(s * width * 256.0f); + int base_v = (int)(t * height * 256.0f); base_u >>= 8; base_v >>= 8; @@ -610,7 +610,7 @@ static inline Vec4IntResult SOFTRAST_CALL ApplyTexelClampQuadT(bool clamp, int v static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, float in_s, int &frac_u, int x, const SamplerID &samplerID) { int width = samplerID.cached.sizes[level].w; - int base_u = (int)(in_s * width * 256) + 12 - x - 128; + int base_u = (int)(in_s * width * 256) - 128; frac_u = (int)(base_u >> 4) & 0x0F; base_u >>= 8; @@ -621,7 +621,7 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, fl static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, float in_t, int &frac_v, int y, const SamplerID &samplerID) { int height = samplerID.cached.sizes[level].h; - int base_v = (int)(in_t * height * 256) + 12 - y - 128; + int base_v = (int)(in_t * height * 256) - 128; frac_v = (int)(base_v >> 4) & 0x0F; base_v >>= 8; diff --git a/GPU/Software/SamplerX86.cpp b/GPU/Software/SamplerX86.cpp index 4ef5b909dee9..9471ff61a5e1 100644 --- a/GPU/Software/SamplerX86.cpp +++ b/GPU/Software/SamplerX86.cpp @@ -2582,22 +2582,8 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) { Describe("Texel"); // First, adjust X and Y... - X64Reg xReg = regCache_.Find(RegCache::GEN_ARG_X); - X64Reg yReg = regCache_.Find(RegCache::GEN_ARG_Y); - NEG(32, R(xReg)); - ADD(32, R(xReg), Imm8(12)); - NEG(32, R(yReg)); - ADD(32, R(yReg), Imm8(12)); - - X64Reg tempXYReg = regCache_.Alloc(RegCache::VEC_TEMP5); - SHL(64, R(yReg), Imm8(32)); - OR(64, R(xReg), R(yReg)); - MOVQ_xmm(tempXYReg, R(xReg)); - if (id.hasAnyMips) - PSHUFD(tempXYReg, R(tempXYReg), _MM_SHUFFLE(1, 0, 1, 0)); - regCache_.Unlock(xReg, RegCache::GEN_ARG_X); + // TODO: Shouldn't do this in the sampler, need to get s/t right. regCache_.ForceRelease(RegCache::GEN_ARG_X); - regCache_.Unlock(yReg, RegCache::GEN_ARG_Y); regCache_.ForceRelease(RegCache::GEN_ARG_Y); X64Reg uReg = regCache_.Alloc(RegCache::GEN_ARG_U); @@ -2640,12 +2626,11 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) { CVTTPS2DQ(sReg, R(sReg)); regCache_.Release(sizesReg, RegCache::VEC_TEMP0); - PADDD(sReg, R(tempXYReg)); PSRLD(sReg, 8); // Reuse tempXYReg for the level1 values. if (!cpu_info.bSSE4_1) - PSHUFD(tempXYReg, R(sReg), _MM_SHUFFLE(3, 2, 3, 2)); + PSHUFD(tReg, R(sReg), _MM_SHUFFLE(3, 2, 3, 2)); auto applyClampWrap = [&](X64Reg dest, bool clamp, bool isY, bool isLevel1) { int offset = offsetof(SamplerID, cached.sizes[0].w) + (isY ? 2 : 0) + (isLevel1 ? 4 : 0); @@ -2659,7 +2644,7 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) { else MOVD_xmm(R(dest), sReg); } else { - X64Reg srcReg = isLevel1 ? tempXYReg : sReg; + X64Reg srcReg = isLevel1 ? tReg : sReg; MOVD_xmm(R(dest), srcReg); if (!isY) PSRLDQ(srcReg, 4); @@ -2699,8 +2684,7 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) { UNPCKLPS(sReg, R(tReg)); MULPS(sReg, M(constWidthHeight256f_)); CVTTPS2DQ(sReg, R(sReg)); - // Add the X/Y offsets, then shift out the fraction. - PADDD(sReg, R(tempXYReg)); + // Great, shift out the fraction. PSRLD(sReg, 8); // Square textures are kinda common. @@ -2758,8 +2742,6 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) { regCache_.ForceRelease(RegCache::VEC_ARG_S); regCache_.ForceRelease(RegCache::VEC_ARG_T); - regCache_.Release(tempXYReg, RegCache::VEC_TEMP5); - return true; } @@ -2830,26 +2812,12 @@ bool SamplerJitCache::Jit_GetTexelCoordsQuad(const SamplerID &id) { CVTPS2DQ(sReg, R(sReg)); // Now adjust X and Y... - // TODO: Could we cache this? Should only vary on offset, maybe? - X64Reg xReg = regCache_.Find(RegCache::GEN_ARG_X); - X64Reg yReg = regCache_.Find(RegCache::GEN_ARG_Y); - NEG(32, R(xReg)); - SUB(32, R(xReg), Imm8(128 - 12)); - NEG(32, R(yReg)); - SUB(32, R(yReg), Imm8(128 - 12)); - SHL(64, R(yReg), Imm8(32)); - OR(64, R(xReg), R(yReg)); - - // Add them in. We do this in the SSE because we have more to do there... X64Reg tempXYReg = regCache_.Alloc(RegCache::VEC_TEMP0); - MOVQ_xmm(tempXYReg, R(xReg)); - if (id.hasAnyMips) - PSHUFD(tempXYReg, R(tempXYReg), _MM_SHUFFLE(1, 0, 1, 0)); + // Product a -128 constant. + PCMPEQD(tempXYReg, R(tempXYReg)); + PSLLD(tempXYReg, 7); PADDD(sReg, R(tempXYReg)); regCache_.Release(tempXYReg, RegCache::VEC_TEMP0); - - regCache_.Unlock(xReg, RegCache::GEN_ARG_X); - regCache_.Unlock(yReg, RegCache::GEN_ARG_Y); regCache_.ForceRelease(RegCache::GEN_ARG_X); regCache_.ForceRelease(RegCache::GEN_ARG_Y); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 92e0e7a1f8bf..42831ae5a4e1 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -220,8 +220,8 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = { { GE_CMD_ANTIALIASENABLE, 0, SoftDirty::RAST_BASIC }, // Viewport and offset for positions. - { GE_CMD_OFFSETX, 0, SoftDirty::BINNER_RANGE | SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_OFFSET }, - { GE_CMD_OFFSETY, 0, SoftDirty::BINNER_RANGE | SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_OFFSET }, + { GE_CMD_OFFSETX, 0, SoftDirty::RAST_OFFSET }, + { GE_CMD_OFFSETY, 0, SoftDirty::RAST_OFFSET }, { GE_CMD_VIEWPORTXSCALE, 0, SoftDirty::TRANSFORM_VIEWPORT }, { GE_CMD_VIEWPORTYSCALE, 0, SoftDirty::TRANSFORM_VIEWPORT }, { GE_CMD_VIEWPORTXCENTER, 0, SoftDirty::TRANSFORM_VIEWPORT }, diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index a548bdb6b5ae..57a320b9ba77 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -125,7 +125,10 @@ static ScreenCoords ClipToScreenInternal(Vec3f scaled, const ClipCoords &coords, // 16 = 0xFFFF / 4095.9375 // Round up at 0.625 to the nearest subpixel. - return ScreenCoords(scaled.x * 16.0f + 0.375f, scaled.y * 16.0f + 0.375f, scaled.z); + static_assert(SCREEN_SCALE_FACTOR == 16, "Currently only supports scale 16"); + int x = (int)(scaled.x * 16.0f + 0.375f - gstate.getOffsetX16()); + int y = (int)(scaled.y * 16.0f + 0.375f - gstate.getOffsetY16()); + return ScreenCoords(x, y, scaled.z); } static inline ScreenCoords ClipToScreenInternal(const ClipCoords &coords, bool *outside_range_flag) { @@ -159,8 +162,8 @@ ScreenCoords TransformUnit::ClipToScreen(const ClipCoords &coords) { ScreenCoords TransformUnit::DrawingToScreen(const DrawingCoords &coords, u16 z) { ScreenCoords ret; - ret.x = (u32)coords.x * 16 + gstate.getOffsetX16(); - ret.y = (u32)coords.y * 16 + gstate.getOffsetY16(); + ret.x = (u32)coords.x * SCREEN_SCALE_FACTOR; + ret.y = (u32)coords.y * SCREEN_SCALE_FACTOR; ret.z = z; return ret; } @@ -177,8 +180,6 @@ struct TransformState { float fogEnd; float fogSlope; - uint16_t screenOffsetX; - uint16_t screenOffsetY; float matrix[16]; Vec3f screenScale; @@ -207,8 +208,6 @@ void ComputeTransformState(TransformState *state, const VertexReader &vreader) { state->negateNormals = gstate.areNormalsReversed(); state->uvGenMode = gstate.getUVGenMode(); - state->screenOffsetX = gstate.getOffsetX16(); - state->screenOffsetY = gstate.getOffsetY16(); if (state->enableTransform) { if (state->enableFog) { @@ -427,8 +426,8 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState if (state.enableLighting) Lighting::Process(vertex, worldpos, worldnormal, state.lightingState); } else { - vertex.screenpos.x = (int)(pos[0] * 16) + state.screenOffsetX; - vertex.screenpos.y = (int)(pos[1] * 16) + state.screenOffsetY; + vertex.screenpos.x = (int)(pos[0] * SCREEN_SCALE_FACTOR); + vertex.screenpos.y = (int)(pos[1] * SCREEN_SCALE_FACTOR); vertex.screenpos.z = pos[2]; vertex.clippos.w = 1.f; vertex.fogdepth = 1.f; @@ -891,7 +890,7 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector ModelCoords; @@ -104,15 +106,15 @@ class TransformUnit { static ViewCoords WorldToView(const WorldCoords& coords); static ClipCoords ViewToClip(const ViewCoords& coords); static ScreenCoords ClipToScreen(const ClipCoords& coords); - static inline DrawingCoords ScreenToDrawing(int x, int y, int offsetX, int offsetY) { + static inline DrawingCoords ScreenToDrawing(int x, int y) { DrawingCoords ret; - // When offset > coord, it correctly goes negative and force-scissors. - ret.x = (x - offsetX) / 16; - ret.y = (y - offsetY) / 16; + // When offset > coord, this is negative and force-scissors. + ret.x = x / SCREEN_SCALE_FACTOR; + ret.y = y / SCREEN_SCALE_FACTOR; return ret; } - static inline DrawingCoords ScreenToDrawing(const ScreenCoords &coords, int offsetX, int offsetY) { - return ScreenToDrawing(coords.x, coords.y, offsetX, offsetY); + static inline DrawingCoords ScreenToDrawing(const ScreenCoords &coords) { + return ScreenToDrawing(coords.x, coords.y); } static ScreenCoords DrawingToScreen(const DrawingCoords &coords, u16 z); diff --git a/pspautotests b/pspautotests index 8b5da5710a96..682a4303aba6 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit 8b5da5710a965d48f685e7c9dbfaeeae32d4c4d8 +Subproject commit 682a4303aba63a50c91ae0fa6928c9dac8ca9b92 diff --git a/test.py b/test.py index 07358fde09b3..72c6302eb24e 100755 --- a/test.py +++ b/test.py @@ -150,9 +150,7 @@ def target(): "gpu/commands/material", "gpu/displaylist/alignment", "gpu/dither/dither", - "gpu/filtering/linear", "gpu/filtering/mipmaplinear", - "gpu/filtering/nearest", "gpu/ge/break", "gpu/ge/context", "gpu/ge/edram", @@ -387,6 +385,12 @@ def target(): "gpu/complex/complex", "gpu/depth/precision", "gpu/displaylist/state", + "gpu/filtering/linear", + "gpu/filtering/nearest", + "gpu/filtering/precisionlinear2d", + "gpu/filtering/precisionlinear3d", + "gpu/filtering/precisionnearest2d", + "gpu/filtering/precisionnearest3d", "gpu/ge/get", "gpu/primitives/bezier", "gpu/primitives/continue",