Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reinterpret between 32 and 16 bit texture formats #15907

Merged
merged 36 commits into from
Aug 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7cba231
Finish the depal refactoring.
hrydgard Aug 26, 2022
cd0a31a
Work in bytes in the framebuffer matching algorithm for block copies,…
hrydgard Aug 25, 2022
9a82a7d
Detect need for 16<->32-bit reinterprets
hrydgard Aug 25, 2022
eeb784b
Prepare for 16/32 reinterpret
hrydgard Aug 26, 2022
afb90f6
Refactor reinterpret to prepare for 16/32-bit
hrydgard Aug 26, 2022
3c5ec25
Initial implementation of 32/16-bit color reinterpret blits.
hrydgard Aug 26, 2022
1ccfd79
wip
hrydgard Aug 26, 2022
6aecbef
Unsuccessful swizzle implementation
hrydgard Aug 26, 2022
dfccdc6
More successful swizzle impl. Not perfect
hrydgard Aug 26, 2022
39690a9
Quick hack to widen the depal texture if 0x200000 swizzle is needed
hrydgard Aug 26, 2022
693fd08
Bugfix, better logging
hrydgard Aug 26, 2022
6781dd5
Fix Katamari
hrydgard Aug 26, 2022
8acae79
Minor D3D11 refactoring
hrydgard Aug 27, 2022
27ba743
Comment fix, cleanup
hrydgard Aug 27, 2022
59053e7
Fix bug in reinterpret float impl
hrydgard Aug 27, 2022
880ea48
Implement the stencil/alpha reverse trick for all backends
hrydgard Aug 27, 2022
769f3d1
Accept color textures as reinterpretable between 32 and 16-bit formats
hrydgard Aug 27, 2022
cb84977
Framebuffer texture matching: Remove heuristics other than seqCount
hrydgard Aug 27, 2022
7be3063
When cloning framebuffers for reinterpret, adjust width according to bpp
hrydgard Aug 27, 2022
628bac0
Log improvement
hrydgard Aug 27, 2022
85f2a0f
Remove some old logging that's not very interesting anymore
hrydgard Aug 27, 2022
c533ea4
Logging improvements
hrydgard Aug 27, 2022
92eaac0
Fix problem matching framebuffers for reinterpret
hrydgard Aug 27, 2022
28cfbdd
Enable reinterprets and create-buffer-on-copy for the Tantalus games
hrydgard Aug 27, 2022
8d1157e
GL bugfix, log cleanup
hrydgard Aug 27, 2022
869bd16
Fix UV offsets in 16->32 reinterpret. Better, but something is still …
hrydgard Aug 27, 2022
8bf1b2b
Reinterpret is the proper fix for God of War's shadows.
hrydgard Aug 27, 2022
a74d2ec
Comment fixes
hrydgard Aug 27, 2022
c80245a
OpenGL graphics fixes
hrydgard Aug 27, 2022
6cedf34
Block transfer bug fix
hrydgard Aug 27, 2022
322f29c
Comments and fixes
hrydgard Aug 27, 2022
df92f72
Unify the spongebob depth inverse check
hrydgard Aug 28, 2022
dae92b4
Safer spongebob depth inverse check, includes color check and depth func
hrydgard Aug 28, 2022
39f6621
Address additional feedback, thanks!
hrydgard Aug 28, 2022
98f2e2a
Remove irrelevant stencil state checks
hrydgard Aug 28, 2022
e63c14b
More spongebob check tweaks, assert fix
hrydgard Aug 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Common/GPU/Shader.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ struct UniformBufferDesc {
std::vector<UniformDesc> uniforms;
};

struct UniformDef {
const char *type;
const char *name;
int index;
};

struct SamplerDef {
const char *name;
// TODO: Might need unsigned samplers, 3d samplers, or other types in the future.
Expand Down
6 changes: 0 additions & 6 deletions Common/GPU/ShaderWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@ struct InputDef {
int semantic;
};

struct UniformDef {
const char *type;
const char *name;
int index;
};

struct VaryingDef {
const char *type;
const char *name;
Expand Down
1 change: 1 addition & 0 deletions Common/Math/math_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ inline bool isPowerOf2(int n) {
return n == 1 || (n & (n - 1)) == 0;
}

// Next power of 2.
inline uint32_t RoundUpToPowerOf2(uint32_t v) {
v--;
v |= v >> 1;
Expand Down
19 changes: 17 additions & 2 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "Core/Reporting.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
#include "GPU/Common/Draw2D.h"

static const InputDef vsInputs[2] = {
{ "vec2", "a_position", Draw::SEM_POSITION, },
Expand All @@ -47,10 +48,23 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
const int shift = config.shift;
const int mask = config.mask;

writer.C(" vec2 texcoord = v_texcoord;\n");

// Implement the swizzle we need to simulate, if a game uses 8888 framebuffers and any other mode than "6" to access depth textures.
// This implements the "2" mode swizzle (it fixes up the Y direction but not X. See comments on issue #15898)
// NOTE: This swizzle can be made to work with any power-of-2 resolution scaleFactor by shifting
// the bits around, but not sure how to handle 3x scaling. For now this is 1x-only (rough edges at higher resolutions).
if (config.bufferFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
writer.ConstFloat("z_scale", factors.scale);
writer.ConstFloat("z_offset", factors.offset);
if (config.depthUpperBits == 0x2) {
writer.C(R"(
int x = int((texcoord.x / scaleFactor) * texSize.x);
int temp = (x & 0xFFFFFE0F) | ((x >> 1) & 0xF0) | ((x << 4) & 0x100);
texcoord.x = (float(temp) / texSize.x) * scaleFactor;
)");
}
}

// Sampling turns our texture into floating point. To avoid this, might be able
Expand All @@ -66,7 +80,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
// An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such
// as those that Test Drive uses for its color remapping. But would need game specific flagging.

writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
writer.C(" vec4 color = ").SampleTexture2D("tex", "texcoord").C(";\n");

int shiftedMask = mask << shift;
switch (config.bufferFormat) {
Expand Down Expand Up @@ -103,6 +117,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {

if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
// Convert depth to 565, without going through a CLUT.
// TODO: Make "depal without a CLUT" a separate concept, to avoid redundantly creating a CLUT texture.
writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n");
writer.C(" float r = float(idepth & 31) / 31.0f;\n");
writer.C(" float g = float((idepth >> 5) & 63) / 63.0f;\n");
Expand Down Expand Up @@ -323,7 +338,7 @@ void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) {
writer.DeclareSamplers(samplers);
writer.HighPrecisionFloat();
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
writer.BeginFSMain(config.bufferFormat == GE_FORMAT_DEPTH16 ? g_draw2Duniforms : Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
if (config.smoothedDepal) {
// Handles a limited set of cases, but doesn't need any integer math so we don't
// need two variants.
Expand Down
7 changes: 4 additions & 3 deletions GPU/Common/DepalettizeShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ class ShaderWriter;
static const int DEPAL_TEXTURE_OLD_AGE = 120;

struct DepalConfig {
int mask;
int shift;
u32 startPos;
u8 mask;
u8 shift;
bool smoothedDepal;
u8 depthUpperBits;
GEPaletteFormat clutFormat;
GETextureFormat textureFormat;
GEBufferFormat bufferFormat;
bool smoothedDepal;
};

void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config);
20 changes: 17 additions & 3 deletions GPU/Common/Draw2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ static const SamplerDef samplers[1] = {
{ "tex" },
};

static const UniformDef uniforms[2] = {
const UniformDef g_draw2Duniforms[2] = {
{ "vec2", "texSize", 0 },
{ "float", "scaleFactor", 1},
};
Expand All @@ -53,7 +53,7 @@ struct Draw2DUB {

const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), {
{ "texSize", -1, 0, UniformType::FLOAT2, 0 },
{ "scaleFactor", -1, 1, UniformType::FLOAT1, 0 },
{ "scaleFactor", -1, 1, UniformType::FLOAT1, 8 },
} };


Expand Down Expand Up @@ -102,7 +102,7 @@ Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) {

Draw2DPipelineInfo GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH);
writer.BeginFSMain(g_draw2Duniforms, varyings, FSFLAG_WRITEDEPTH);
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
// have to apply the scaling.
Expand Down Expand Up @@ -253,6 +253,20 @@ Draw2DPipeline *Draw2D::Create2DPipeline(std::function<Draw2DPipelineInfo (Shade
};
}

void Draw2D::Blit(Draw2DPipeline *pipeline, float srcX1, float srcY1, float srcX2, float srcY2, float dstX1, float dstY1, float dstX2, float dstY2, float srcWidth, float srcHeight, float dstWidth, float dstHeight, bool linear, int scaleFactor) {
float dX = 1.0f / (float)dstWidth;
float dY = 1.0f / (float)dstHeight;
float sX = 1.0f / (float)srcWidth;
float sY = 1.0f / (float)srcHeight;
Draw2DVertex vtx[4] = {
{ -1.0f + 2.0f * dX * dstX1, -(1.0f - 2.0f * dY * dstY1), sX * srcX1, sY * srcY1 },
{ -1.0f + 2.0f * dX * dstX2, -(1.0f - 2.0f * dY * dstY1), sX * srcX2, sY * srcY1 },
{ -1.0f + 2.0f * dX * dstX1, -(1.0f - 2.0f * dY * dstY2), sX * srcX1, sY * srcY2 },
{ -1.0f + 2.0f * dX * dstX2, -(1.0f - 2.0f * dY * dstY2), sX * srcX2, sY * srcY2 },
};

DrawStrip2D(nullptr, vtx, 4, linear, pipeline, srcWidth, srcHeight, scaleFactor);
}

void Draw2D::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW, float texH, int scaleFactor) {
using namespace Draw;
Expand Down
4 changes: 4 additions & 0 deletions GPU/Common/Draw2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct Draw2DPipelineInfo {
Slice<SamplerDef> samplers;
};

extern const UniformDef g_draw2Duniforms[2];

struct Draw2DPipeline {
Draw::Pipeline *pipeline;
Draw2DPipelineInfo info;
Expand All @@ -58,6 +60,8 @@ class Draw2D {
Draw2DPipeline *Create2DPipeline(std::function<Draw2DPipelineInfo(ShaderWriter &)> generate);

void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW = 0.0f, float texH = 0.0f, int scaleFactor = 0);

void Blit(Draw2DPipeline *pipeline, float srcX1, float srcY1, float srcX2, float srcY2, float dstX1, float dstY1, float dstX2, float dstY2, float srcWidth, float srcHeight, float dstWidth, float dstHeight, bool linear, int scaleFactor);
void Ensure2DResources();

private:
Expand Down
Loading