Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GL: Optimize video uploads a bit #9485

Merged
merged 2 commits into from
Mar 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions GPU/Common/FramebufferCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,11 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
textureCache_->ForgetLastTexture();
shaderManager_->DirtyLastShader(); // On GL, important that this is BEFORE drawing
float u0 = 0.0f, u1 = 1.0f;
float v0 = 0.0f, v1 = 1.0f;

MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height, u1, v1);

if (useBufferedRendering_ && vfb && vfb->fbo) {
draw_->BindFramebufferAsRenderTarget(vfb->fbo);
SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
Expand All @@ -707,20 +711,20 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int
CenterDisplayOutputRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)pixelWidth_, (float)pixelHeight_, ROTATION_LOCKED_HORIZONTAL);
SetViewport2D(x, y, w, h);
}

MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height);
DisableState();

bool linearFilter = vfb || g_Config.iBufFilter == SCALE_LINEAR;
Bind2DShader();
DrawActiveTexture(dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, 0.0f, v0, 1.0f, v1, ROTATION_LOCKED_HORIZONTAL, linearFilter);
DrawActiveTexture(dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, linearFilter);
}

void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) {
textureCache_->ForgetLastTexture();
shaderManager_->DirtyLastShader();

MakePixelTexture(srcPixels, srcPixelFormat, srcStride, 512, 272);
float u0 = 0.0f, u1 = 480.0f / 512.0f;
float v0 = 0.0f, v1 = 1.0f;
MakePixelTexture(srcPixels, srcPixelFormat, srcStride, 512, 272, u1, v1);

DisableState();

Expand All @@ -745,8 +749,6 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, GEBu
} else {
Bind2DShader();
}
float u0 = 0.0f, u1 = 480.0f / 512.0f;
float v0 = 0.0f, v1 = 1.0f;

// We are drawing directly to the back buffer.
if (needBackBufferYSwap_)
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/FramebufferCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ class FramebufferManagerCommon {
protected:
virtual void SetViewport2D(int x, int y, int w, int h);
void CalculatePostShaderUniforms(int bufferWidth, int bufferHeight, int renderWidth, int renderHeight, PostShaderUniforms *uniforms);
virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0;
virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) = 0;
virtual void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, bool linearFilter) = 0;
virtual void Bind2DShader() = 0;
virtual void BindPostShader(const PostShaderUniforms &uniforms) = 0;
Expand Down
2 changes: 1 addition & 1 deletion GPU/D3D11/FramebufferManagerD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ void FramebufferManagerD3D11::CompilePostShader() {
usePostShader_ = true;
}

void FramebufferManagerD3D11::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
void FramebufferManagerD3D11::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) {
u8 *convBuf = NULL;

// TODO: Check / use D3DCAPS2_DYNAMICTEXTURES?
Expand Down
2 changes: 1 addition & 1 deletion GPU/D3D11/FramebufferManagerD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
void CompilePostShader();
void BindPostShader(const PostShaderUniforms &uniforms) override;
void Bind2DShader() override;
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) override;
void PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void SimpleBlit(
Expand Down
10 changes: 6 additions & 4 deletions GPU/D3D11/StencilBufferD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,9 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ

u16 w = dstBuffer->renderWidth;
u16 h = dstBuffer->renderHeight;
MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight);
float u1 = 1.0f;
float v1 = 1.0f;
MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight, u1, v1);
if (dstBuffer->fbo) {
draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo);
} else {
Expand All @@ -207,9 +209,9 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ

float coord[20] = {
0.0f,0.0f,0.0f, 0.0f,0.0f,
fw,0.0f,0.0f, 1.0f,0.0f,
0.0f,fh,0.0f, 0.0f,1.0f,
fw,fh,0.0f, 1.0f,1.0f,
fw,0.0f,0.0f, u1,0.0f,
0.0f,fh,0.0f, 0.0f,v1,
fw,fh,0.0f, u1,v1,
};
// I think all these calculations pretty much cancel out?
float invDestW = 1.0f / (fw * 0.5f);
Expand Down
2 changes: 1 addition & 1 deletion GPU/Directx9/FramebufferDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
shaderManager_ = sm;
}

void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) {
u8 *convBuf = NULL;
D3DLOCKED_RECT rect;

Expand Down
2 changes: 1 addition & 1 deletion GPU/Directx9/FramebufferDX9.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon {
void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override;

private:
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) override;
void PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
bool GetRenderTargetFramebuffer(LPDIRECT3DSURFACE9 renderTarget, LPDIRECT3DSURFACE9 offscreen, int w, int h, GPUDebugBuffer &buffer);
Expand Down
10 changes: 6 additions & 4 deletions GPU/Directx9/StencilBufferDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,9 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer
D3DVIEWPORT9 vp{ 0, 0, w, h, 0.0f, 1.0f };
device_->SetViewport(&vp);

MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight);
float u1 = 1.0f;
float v1 = 1.0f;
MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight, u1, v1);

device_->Clear(0, NULL, D3DCLEAR_TARGET | D3DCLEAR_STENCIL, D3DCOLOR_RGBA(0, 0, 0, 0), 0.0f, 0);

Expand All @@ -236,9 +238,9 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer
float fh = dstBuffer->height;
float coord[20] = {
0.0f,0.0f,0.0f, 0.0f,0.0f,
fw,0.0f,0.0f, 1.0f,0.0f,
fw,fh,0.0f, 1.0f,1.0f,
0.0f,fh,0.0f, 0.0f,1.0f,
fw,0.0f,0.0f, u1,0.0f,
fw,fh,0.0f, u1,v1,
0.0f,fh,0.0f, 0.0f,v1,
};
float invDestW = 1.0f / (fw * 0.5f);
float invDestH = 1.0f / (fh * 0.5f);
Expand Down
40 changes: 29 additions & 11 deletions GPU/GLES/FramebufferManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,15 +272,25 @@ FramebufferManagerGLES::~FramebufferManagerGLES() {
delete [] convBuf_;
}

void FramebufferManagerGLES::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
if (drawPixelsTex_ && (drawPixelsTexFormat_ != srcPixelFormat || drawPixelsTexW_ != width || drawPixelsTexH_ != height)) {
void FramebufferManagerGLES::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) {
// Optimization: skip a copy if possible in a common case.
int texWidth = width;
if (srcPixelFormat == GE_FORMAT_8888 && width < srcStride) {
// Don't up the upload requirements too much if subimages are unsupported.
if (gstate_c.Supports(GPU_SUPPORTS_UNPACK_SUBIMAGE) || width >= 480) {
texWidth = srcStride;
u1 *= (float)width / texWidth;
}
}

if (drawPixelsTex_ && (drawPixelsTexFormat_ != srcPixelFormat || drawPixelsTexW_ != texWidth || drawPixelsTexH_ != height)) {
glDeleteTextures(1, &drawPixelsTex_);
drawPixelsTex_ = 0;
}

if (!drawPixelsTex_) {
drawPixelsTex_ = textureCacheGL_->AllocTextureName();
drawPixelsTexW_ = width;
drawPixelsTexW_ = texWidth;
drawPixelsTexH_ = height;

// Initialize backbuffer texture for DrawPixels
Expand All @@ -291,7 +301,7 @@ void FramebufferManagerGLES::MakePixelTexture(const u8 *srcPixels, GEBufferForma
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);

glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
drawPixelsTexFormat_ = srcPixelFormat;
} else {
glBindTexture(GL_TEXTURE_2D, drawPixelsTex_);
Expand All @@ -300,9 +310,9 @@ void FramebufferManagerGLES::MakePixelTexture(const u8 *srcPixels, GEBufferForma
// TODO: We can just change the texture format and flip some bits around instead of this.
// Could share code with the texture cache perhaps.
bool useConvBuf = false;
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != width) {
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != texWidth) {
useConvBuf = true;
u32 neededSize = width * height * 4;
u32 neededSize = texWidth * height * 4;
if (!convBuf_ || convBufSize_ < neededSize) {
delete [] convBuf_;
convBuf_ = new u8[neededSize];
Expand All @@ -313,31 +323,31 @@ void FramebufferManagerGLES::MakePixelTexture(const u8 *srcPixels, GEBufferForma
case GE_FORMAT_565:
{
const u16 *src = (const u16 *)srcPixels + srcStride * y;
u8 *dst = convBuf_ + 4 * width * y;
u8 *dst = convBuf_ + 4 * texWidth * y;
ConvertRGBA565ToRGBA8888((u32 *)dst, src, width);
}
break;

case GE_FORMAT_5551:
{
const u16 *src = (const u16 *)srcPixels + srcStride * y;
u8 *dst = convBuf_ + 4 * width * y;
u8 *dst = convBuf_ + 4 * texWidth * y;
ConvertRGBA5551ToRGBA8888((u32 *)dst, src, width);
}
break;

case GE_FORMAT_4444:
{
const u16 *src = (const u16 *)srcPixels + srcStride * y;
u8 *dst = convBuf_ + 4 * width * y;
u8 *dst = convBuf_ + 4 * texWidth * y;
ConvertRGBA4444ToRGBA8888((u32 *)dst, src, width);
}
break;

case GE_FORMAT_8888:
{
const u8 *src = srcPixels + srcStride * 4 * y;
u8 *dst = convBuf_ + 4 * width * y;
u8 *dst = convBuf_ + 4 * texWidth * y;
memcpy(dst, src, 4 * width);
}
break;
Expand All @@ -348,7 +358,15 @@ void FramebufferManagerGLES::MakePixelTexture(const u8 *srcPixels, GEBufferForma
}
}
}
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, useConvBuf ? convBuf_ : srcPixels);

// Try to skip uploading the unnecessary parts.
if (gstate_c.Supports(GPU_SUPPORTS_UNPACK_SUBIMAGE) && width != texWidth) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, texWidth);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, useConvBuf ? convBuf_ : srcPixels);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
} else {
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, height, GL_RGBA, GL_UNSIGNED_BYTE, useConvBuf ? convBuf_ : srcPixels);
}
CHECK_GL_ERROR_IF_DEBUG();
}

Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/FramebufferManagerGLES.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class FramebufferManagerGLES : public FramebufferManagerCommon {
void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override;

private:
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) override;
void Bind2DShader() override;
void BindPostShader(const PostShaderUniforms &uniforms) override;
void CompileDraw2DProgram();
Expand Down
6 changes: 4 additions & 2 deletions GPU/GLES/StencilBufferGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,9 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZe
}
glViewport(0, 0, w, h);

MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight);
float u1 = 1.0f;
float v1 = 1.0f;
MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight, u1, v1);
textureCacheGL_->ForgetLastTexture();

glClearStencil(0);
Expand All @@ -213,7 +215,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZe
glstate.stencilMask.set(i);
glUniform1f(u_stencilValue, i * (1.0f / 255.0f));
}
DrawActiveTexture(0, 0, dstBuffer->width, dstBuffer->height, dstBuffer->bufferWidth, dstBuffer->bufferHeight, 0.0f, 0.0f, 1.0f, 1.0f, ROTATION_LOCKED_HORIZONTAL, false);
DrawActiveTexture(0, 0, dstBuffer->width, dstBuffer->height, dstBuffer->bufferWidth, dstBuffer->bufferHeight, 0.0f, 0.0f, u1, v1, ROTATION_LOCKED_HORIZONTAL, false);
}
glstate.stencilMask.set(0xFF);

Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/FramebufferVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ void FramebufferManagerVulkan::Init() {
resized_ = true;
}

void FramebufferManagerVulkan::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
void FramebufferManagerVulkan::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) {
if (drawPixelsTex_ && (drawPixelsTexFormat_ != srcPixelFormat || drawPixelsTex_->GetWidth() != width || drawPixelsTex_->GetHeight() != height)) {
delete drawPixelsTex_;
drawPixelsTex_ = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/FramebufferVulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class FramebufferManagerVulkan : public FramebufferManagerCommon {
private:

// The returned texture does not need to be free'd, might be returned from a pool (currently single entry)
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) override;
void DoNotifyDraw();

VkCommandBuffer AllocFrameCommandBuffer();
Expand Down