hrydgard · hrydgard · Aug 28, 2022 · Aug 26, 2022 · Aug 25, 2022 · Aug 25, 2022
diff --git a/Common/GPU/Shader.h b/Common/GPU/Shader.h
@@ -85,6 +85,12 @@ struct UniformBufferDesc {
 	std::vector<UniformDesc> uniforms;
 };
 
+struct UniformDef {
+	const char *type;
+	const char *name;
+	int index;
+};
+
 struct SamplerDef {
 	const char *name;
 	// TODO: Might need unsigned samplers, 3d samplers, or other types in the future.

diff --git a/Common/GPU/ShaderWriter.h b/Common/GPU/ShaderWriter.h
@@ -22,12 +22,6 @@ struct InputDef {
 	int semantic;
 };
 
-struct UniformDef {
-	const char *type;
-	const char *name;
-	int index;
-};
-
 struct VaryingDef {
 	const char *type;
 	const char *name;

diff --git a/Common/Math/math_util.h b/Common/Math/math_util.h
@@ -28,6 +28,7 @@ inline bool isPowerOf2(int n) {
 	return n == 1 || (n & (n - 1)) == 0;
 }
 
+// Next power of 2.
 inline uint32_t RoundUpToPowerOf2(uint32_t v) {
 	v--;
 	v |= v >> 1;

diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp
@@ -26,6 +26,7 @@
 #include "Core/Reporting.h"
 #include "GPU/Common/GPUStateUtils.h"
 #include "GPU/Common/DepalettizeShaderCommon.h"
+#include "GPU/Common/Draw2D.h"
 
 static const InputDef vsInputs[2] = {
 	{ "vec2", "a_position", Draw::SEM_POSITION, },
@@ -47,10 +48,23 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
 	const int shift = config.shift;
 	const int mask = config.mask;
 
+	writer.C("  vec2 texcoord = v_texcoord;\n");
+
+	// Implement the swizzle we need to simulate, if a game uses 8888 framebuffers and any other mode than "6" to access depth textures.
+	// This implements the "2" mode swizzle (it fixes up the Y direction but not X. See comments on issue #15898)
+	// NOTE: This swizzle can be made to work with any power-of-2 resolution scaleFactor by shifting
+	// the bits around, but not sure how to handle 3x scaling. For now this is 1x-only (rough edges at higher resolutions).
 	if (config.bufferFormat == GE_FORMAT_DEPTH16) {
 		DepthScaleFactors factors = GetDepthScaleFactors();
 		writer.ConstFloat("z_scale", factors.scale);
 		writer.ConstFloat("z_offset", factors.offset);
+		if (config.depthUpperBits == 0x2) {
+			writer.C(R"(
+  int x = int((texcoord.x / scaleFactor) * texSize.x);
+  int temp = (x & 0xFFFFFE0F) | ((x >> 1) & 0xF0) | ((x << 4) & 0x100);
+  texcoord.x = (float(temp) / texSize.x) * scaleFactor;
+)");
+		}
 	}
 
 	// Sampling turns our texture into floating point. To avoid this, might be able
@@ -66,7 +80,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
 	// An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such
 	// as those that Test Drive uses for its color remapping. But would need game specific flagging.
 
-	writer.C("  vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
+	writer.C("  vec4 color = ").SampleTexture2D("tex", "texcoord").C(";\n");
 
 	int shiftedMask = mask << shift;
 	switch (config.bufferFormat) {
@@ -103,6 +117,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
 
 		if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
 			// Convert depth to 565, without going through a CLUT.
+			// TODO: Make "depal without a CLUT" a separate concept, to avoid redundantly creating a CLUT texture.
 			writer.C("  int idepth = int(clamp(depth, 0.0, 65535.0));\n");
 			writer.C("  float r = float(idepth & 31) / 31.0f;\n");
 			writer.C("  float g = float((idepth >> 5) & 63) / 63.0f;\n");
@@ -323,7 +338,7 @@ void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
 void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) {
 	writer.DeclareSamplers(samplers);
 	writer.HighPrecisionFloat();
-	writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
+	writer.BeginFSMain(config.bufferFormat == GE_FORMAT_DEPTH16 ? g_draw2Duniforms : Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
 	if (config.smoothedDepal) {
 		// Handles a limited set of cases, but doesn't need any integer math so we don't
 		// need two variants.

diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h
@@ -27,13 +27,14 @@ class ShaderWriter;
 static const int DEPAL_TEXTURE_OLD_AGE = 120;
 
 struct DepalConfig {
-	int mask;
-	int shift;
 	u32 startPos;
+	u8 mask;
+	u8 shift;
+	bool smoothedDepal;
+	u8 depthUpperBits;
 	GEPaletteFormat clutFormat;
 	GETextureFormat textureFormat;
 	GEBufferFormat bufferFormat;
-	bool smoothedDepal;
 };
 
 void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config);
diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp
@@ -40,7 +40,7 @@ static const SamplerDef samplers[1] = {
 	{ "tex" },
 };
 
-static const UniformDef uniforms[2] = {
+const UniformDef g_draw2Duniforms[2] = {
 	{ "vec2", "texSize", 0 },
 	{ "float", "scaleFactor", 1},
 };
@@ -53,7 +53,7 @@ struct Draw2DUB {
 
 const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), {
 	{ "texSize", -1, 0, UniformType::FLOAT2, 0 },
-	{ "scaleFactor", -1, 1, UniformType::FLOAT1, 0 },
+	{ "scaleFactor", -1, 1, UniformType::FLOAT1, 8 },
 } };
 
 
@@ -102,7 +102,7 @@ Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) {
 
 Draw2DPipelineInfo GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) {
 	writer.DeclareSamplers(samplers);
-	writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH);
+	writer.BeginFSMain(g_draw2Duniforms, varyings, FSFLAG_WRITEDEPTH);
 	writer.C("  vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
 	// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
 	// have to apply the scaling.
@@ -253,6 +253,20 @@ Draw2DPipeline *Draw2D::Create2DPipeline(std::function<Draw2DPipelineInfo (Shade
 	};
 }
 
+void Draw2D::Blit(Draw2DPipeline *pipeline, float srcX1, float srcY1, float srcX2, float srcY2, float dstX1, float dstY1, float dstX2, float dstY2, float srcWidth, float srcHeight, float dstWidth, float dstHeight, bool linear, int scaleFactor) {
+	float dX = 1.0f / (float)dstWidth;
+	float dY = 1.0f / (float)dstHeight;
+	float sX = 1.0f / (float)srcWidth;
+	float sY = 1.0f / (float)srcHeight;
+	Draw2DVertex vtx[4] = {
+		{ -1.0f + 2.0f * dX * dstX1, -(1.0f - 2.0f * dY * dstY1), sX * srcX1, sY * srcY1 },
+		{ -1.0f + 2.0f * dX * dstX2, -(1.0f - 2.0f * dY * dstY1), sX * srcX2, sY * srcY1 },
+		{ -1.0f + 2.0f * dX * dstX1, -(1.0f - 2.0f * dY * dstY2), sX * srcX1, sY * srcY2 },
+		{ -1.0f + 2.0f * dX * dstX2, -(1.0f - 2.0f * dY * dstY2), sX * srcX2, sY * srcY2 },
+	};
+
+	DrawStrip2D(nullptr, vtx, 4, linear, pipeline, srcWidth, srcHeight, scaleFactor);
+}
 
 void Draw2D::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW, float texH, int scaleFactor) {
 	using namespace Draw;

diff --git a/GPU/Common/Draw2D.h b/GPU/Common/Draw2D.h
@@ -36,6 +36,8 @@ struct Draw2DPipelineInfo {
 	Slice<SamplerDef> samplers;
 };
 
+extern const UniformDef g_draw2Duniforms[2];
+
 struct Draw2DPipeline {
 	Draw::Pipeline *pipeline;
 	Draw2DPipelineInfo info;
@@ -58,6 +60,8 @@ class Draw2D {
 	Draw2DPipeline *Create2DPipeline(std::function<Draw2DPipelineInfo(ShaderWriter &)> generate);
 
 	void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW = 0.0f, float texH = 0.0f, int scaleFactor = 0);
+
+	void Blit(Draw2DPipeline *pipeline, float srcX1, float srcY1, float srcX2, float srcY2, float dstX1, float dstY1, float dstX2, float dstY2, float srcWidth, float srcHeight, float dstWidth, float dstHeight, bool linear, int scaleFactor);
 	void Ensure2DResources();
 
 private: