Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Protect savestates while the threaded software renderer is running #1864

Merged
merged 31 commits into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e5efc4b
First crack at ensuring the render thread doesn't touch GPU state whi…
JesseTG Oct 24, 2023
1262822
Get rid of the semaphore wait
JesseTG Oct 24, 2023
e17bf76
Merge branch 'upupstream-master' into jtg/threaded-render-sync
JesseTG Oct 24, 2023
56a6a71
Add some extra fields into GPU3D's serialization
JesseTG Oct 26, 2023
51d32f1
Oops, TempVertexBuffer is already serialized
JesseTG Oct 26, 2023
2c921ba
Move vertex serialization into its own method
JesseTG Oct 26, 2023
2f49a55
Lock the GPU3D state when rendering on the render thread or serializi…
JesseTG Oct 26, 2023
1977566
Revert "Lock the GPU3D state when rendering on the render thread or s…
JesseTG Oct 26, 2023
2fac17f
Add comments that describe the synchronization within GPU3D_Soft
JesseTG Oct 26, 2023
48ef2d1
Revert "Revert "Lock the GPU3D state when rendering on the render thr…
JesseTG Oct 26, 2023
58aaaf0
Let's try locking the GPU3D state throughout NDS::RunFrame
JesseTG Oct 27, 2023
65e8f06
Slim down the lock's scope
JesseTG Oct 27, 2023
53fccd1
Narrow the lock's scope some more
JesseTG Oct 27, 2023
9c4f3b2
Remove the lock entirely
JesseTG Oct 27, 2023
850b180
Try protecting the GPU3D state with just a mutex
JesseTG Oct 29, 2023
6349221
Merge branch 'upupstream-master' into jtg/threaded-render-sync
JesseTG Nov 7, 2023
13afd81
Merge branch 'upupstream-master' into jtg/threaded-render-sync
JesseTG Dec 28, 2023
0a56652
Remove a duplicate method definition
JesseTG Dec 28, 2023
888a970
Merge branch 'upupstream-master' into jtg/threaded-render-sync
JesseTG Jan 4, 2024
9397008
Add a missing `noexcept` specifier
JesseTG Jan 4, 2024
251d5d6
Remove an unused function
JesseTG Jan 5, 2024
3a64932
Cut some non-hardware state from `GPU3D`'s savestate
JesseTG Jan 6, 2024
30d06bc
Assume that the next frame after loading a savestate won't be identical
JesseTG Jan 6, 2024
942ee74
Actually, it _is_ worth it
JesseTG Jan 6, 2024
1fc2f72
Don't serialize the clip matrix
JesseTG Jan 6, 2024
69bca9b
Serialize `RenderPolygonRAM` as an array of indexes
JesseTG Jan 7, 2024
23f4281
Clean up some comments
JesseTG Jan 7, 2024
bd05985
Try restarting the render thread instead of using the lock
JesseTG Jan 7, 2024
1c1b3fc
Put the lock back
JesseTG Jan 7, 2024
2337924
Fix some polygon and vertex indexes being saved incorrectly
JesseTG Jan 7, 2024
0150394
Remove `SoftRenderer::StateBusy` since it turns out we don't need it
JesseTG Jan 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 65 additions & 42 deletions src/GPU3D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,19 @@ GPU3D::GPU3D(melonDS::NDS& nds, std::unique_ptr<Renderer3D>&& renderer) noexcept
{
}

void Vertex::DoSavestate(Savestate* file) noexcept
{
file->VarArray(Position, sizeof(Position));
file->VarArray(Color, sizeof(Color));
file->VarArray(TexCoords, sizeof(TexCoords));

file->Bool32(&Clipped);

file->VarArray(FinalPosition, sizeof(FinalPosition));
file->VarArray(FinalColor, sizeof(FinalColor));
file->VarArray(HiresPosition, sizeof(HiresPosition));
}

void GPU3D::SetCurrentRenderer(std::unique_ptr<Renderer3D>&& renderer) noexcept
{
CurrentRenderer = std::move(renderer);
Expand Down Expand Up @@ -297,6 +310,12 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
{
file->Section("GP3D");

SoftRenderer* softRenderer = dynamic_cast<SoftRenderer*>(CurrentRenderer.get());
if (softRenderer && softRenderer->IsThreaded())
{
softRenderer->SetupRenderThread(NDS.GPU);
}

CmdFIFO.DoSavestate(file);
CmdPIPE.DoSavestate(file);

Expand Down Expand Up @@ -372,33 +391,21 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Var32(&VertexNumInPoly);
file->Var32(&NumConsecutivePolygons);

for (int i = 0; i < 4; i++)
for (Vertex& vtx : TempVertexBuffer)
{
Vertex* vtx = &TempVertexBuffer[i];

file->VarArray(vtx->Position, sizeof(s32)*4);
file->VarArray(vtx->Color, sizeof(s32)*3);
file->VarArray(vtx->TexCoords, sizeof(s16)*2);

file->Bool32(&vtx->Clipped);

file->VarArray(vtx->FinalPosition, sizeof(s32)*2);
file->VarArray(vtx->FinalColor, sizeof(s32)*3);
vtx.DoSavestate(file);
}

if (file->Saving)
{
u32 id;
if (LastStripPolygon) id = (u32)((LastStripPolygon - (&PolygonRAM[0])) / sizeof(Polygon));
else id = -1;
file->Var32(&id);
u32 index = LastStripPolygon ? (u32)(LastStripPolygon - &PolygonRAM[0]) : UINT32_MAX;
file->Var32(&index);
}
else
{
u32 id;
file->Var32(&id);
if (id == 0xFFFFFFFF) LastStripPolygon = NULL;
else LastStripPolygon = &PolygonRAM[id];
u32 index = UINT32_MAX;
file->Var32(&index);
LastStripPolygon = (index == UINT32_MAX) ? nullptr : &PolygonRAM[index];
}

file->Var32(&CurRAMBank);
Expand All @@ -409,18 +416,9 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Var32(&FlushRequest);
file->Var32(&FlushAttributes);

for (int i = 0; i < 6144*2; i++)
for (Vertex& vtx : VertexRAM)
{
Vertex* vtx = &VertexRAM[i];

file->VarArray(vtx->Position, sizeof(s32)*4);
file->VarArray(vtx->Color, sizeof(s32)*3);
file->VarArray(vtx->TexCoords, sizeof(s16)*2);

file->Bool32(&vtx->Clipped);

file->VarArray(vtx->FinalPosition, sizeof(s32)*2);
file->VarArray(vtx->FinalColor, sizeof(s32)*3);
vtx.DoSavestate(file);
}

for(int i = 0; i < 2048*2; i++)
Expand All @@ -434,20 +432,17 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
for (int j = 0; j < 10; j++)
{
Vertex* ptr = poly->Vertices[j];
u32 id;
if (ptr) id = (u32)((ptr - (&VertexRAM[0])) / sizeof(Vertex));
else id = -1;
file->Var32(&id);
u32 index = ptr ? (u32)(ptr - &VertexRAM[0]) : UINT32_MAX;
file->Var32(&index);
}
}
else
{
for (int j = 0; j < 10; j++)
{
u32 id = -1;
file->Var32(&id);
if (id == 0xFFFFFFFF) poly->Vertices[j] = NULL;
else poly->Vertices[j] = &VertexRAM[id];
u32 index = UINT32_MAX;
file->Var32(&index);
poly->Vertices[j] = index == UINT32_MAX ? nullptr : &VertexRAM[index];
}
}

Expand Down Expand Up @@ -495,7 +490,6 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
}
}

// probably not worth storing the vblank-latched Renderxxxxxx variables
CmdStallQueue.DoSavestate(file);

file->Var32((u32*)&VertexPipeline);
Expand All @@ -511,10 +505,27 @@ void GPU3D::DoSavestate(Savestate* file) noexcept

CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0];
CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0];
}

file->Var32(&RenderNumPolygons);
if (file->Saving)
{
for (const Polygon* p : RenderPolygonRAM)
{
u32 index = p ? (p - &PolygonRAM[0]) : UINT32_MAX;

file->Var32(&index);
}
}
else
{
for (int i = 0; i < RenderPolygonRAM.size(); ++i)
{
u32 index = UINT32_MAX;
file->Var32(&index);

// better safe than sorry, I guess
// might cause a blank frame but atleast it won't shit itself
RenderNumPolygons = 0;
RenderPolygonRAM[i] = index == UINT32_MAX ? nullptr : &PolygonRAM[index];
}
}

file->VarArray(CurVertex, sizeof(s16)*3);
Expand All @@ -534,6 +545,18 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->VarArray(ShininessTable, 128*sizeof(u8));

file->Bool32(&AbortFrame);
file->Bool32(&GeometryEnabled);
file->Bool32(&RenderingEnabled);
file->Var32(&PolygonMode);
file->Var32(&PolygonAttr);
file->Var32(&CurPolygonAttr);
file->Var32(&TexParam);
file->Var32(&TexPalette);
RenderFrameIdentical = false;
if (softRenderer && softRenderer->IsThreaded())
{
softRenderer->EnableRenderThread();
}
}


Expand Down
6 changes: 4 additions & 2 deletions src/GPU3D.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct Vertex
// TODO maybe: hi-res color? (that survives clipping)
s32 HiresPosition[2];

void DoSavestate(Savestate* file) noexcept;
};

struct Polygon
Expand Down Expand Up @@ -78,6 +79,7 @@ struct Polygon

u32 SortKey;

void DoSavestate(Savestate* file) noexcept;
};

class Renderer3D;
Expand Down Expand Up @@ -269,7 +271,7 @@ class GPU3D
u32 RenderClearAttr1 = 0;
u32 RenderClearAttr2 = 0;

bool RenderFrameIdentical = false;
bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize

bool AbortFrame = false;

Expand Down Expand Up @@ -323,7 +325,7 @@ class GPU3D

u32 FlushRequest = 0;
u32 FlushAttributes = 0;
u32 ScrolledLine[256];
u32 ScrolledLine[256]; // not part of the hardware state, don't serialize
};

class Renderer3D
Expand Down
53 changes: 47 additions & 6 deletions src/GPU3D_Soft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ void SoftRenderer::StopRenderThread()
{
if (RenderThreadRunning.load(std::memory_order_relaxed))
{
// Tell the render thread to stop drawing new frames, and finish up the current one.
RenderThreadRunning = false;

Platform::Semaphore_Post(Sema_RenderStart);

Platform::Thread_Wait(RenderThread);
Platform::Thread_Free(RenderThread);
RenderThread = nullptr;
Expand All @@ -47,31 +50,50 @@ void SoftRenderer::SetupRenderThread(GPU& gpu)
if (Threaded)
{
if (!RenderThreadRunning.load(std::memory_order_relaxed))
{
RenderThreadRunning = true;
{ // If the render thread isn't already running...
RenderThreadRunning = true; // "Time for work, render thread!"
RenderThread = Platform::Thread_Create([this, &gpu]() {
RenderThreadFunc(gpu);
});
}

// otherwise more than one frame can be queued up at once
// "Be on standby, but don't start rendering until I tell you to!"
Platform::Semaphore_Reset(Sema_RenderStart);

// "Oh, sorry, were you already in the middle of a frame from the last iteration?"
if (RenderThreadRendering)
// "Tell me when you're done, I'll wait here."
Platform::Semaphore_Wait(Sema_RenderDone);

// "All good? Okay, let me give you your training."
// "(Maybe you're still the same thread, but I have to tell you this stuff anyway.)"

// "This is the signal you'll send when you're done with a frame."
// "I'll listen for it when I need to show something to the frontend."
Platform::Semaphore_Reset(Sema_RenderDone);

// "This is the signal I'll send when I want you to start rendering."
// "Don't do anything until you get the message."
Platform::Semaphore_Reset(Sema_RenderStart);
Platform::Semaphore_Reset(Sema_ScanlineCount);

Platform::Semaphore_Post(Sema_RenderStart);
// "This is the signal you'll send every time you finish drawing a line."
// "I might need some of your scanlines before you finish the whole buffer,"
// "so let me know as soon as you're done with each one."
Platform::Semaphore_Reset(Sema_ScanlineCount);
}
else
{
StopRenderThread();
}
}

void SoftRenderer::EnableRenderThread()
{
if (Threaded && Sema_RenderStart)
{
Platform::Semaphore_Post(Sema_RenderStart);
}
}

SoftRenderer::SoftRenderer(bool threaded) noexcept
: Renderer3D(false), Threaded(threaded)
Expand Down Expand Up @@ -103,6 +125,7 @@ void SoftRenderer::Reset(GPU& gpu)
PrevIsShadowMask = false;

SetupRenderThread(gpu);
EnableRenderThread();
}

void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept
Expand All @@ -111,6 +134,7 @@ void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept
{
Threaded = threaded;
SetupRenderThread(gpu);
EnableRenderThread();
}
}

Expand Down Expand Up @@ -1692,12 +1716,14 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg
ScanlineFinalPass(gpu.GPU3D, y-1);

if (threaded)
// Notify the main thread that we're done with a scanline.
Platform::Semaphore_Post(Sema_ScanlineCount);
}

ScanlineFinalPass(gpu.GPU3D, 191);

if (threaded)
// If this renderer is threaded, notify the main thread that we're done with the frame.
Platform::Semaphore_Post(Sema_ScanlineCount);
}

Expand All @@ -1719,6 +1745,7 @@ void SoftRenderer::RenderFrame(GPU& gpu)

if (RenderThreadRunning.load(std::memory_order_relaxed))
{
// "Render thread, you're up! Get moving."
Platform::Semaphore_Post(Sema_RenderStart);
}
else if (!FrameIdentical)
Expand All @@ -1731,18 +1758,26 @@ void SoftRenderer::RenderFrame(GPU& gpu)
void SoftRenderer::RestartFrame(GPU& gpu)
{
SetupRenderThread(gpu);
EnableRenderThread();
}

void SoftRenderer::RenderThreadFunc(GPU& gpu)
{
for (;;)
{
// Wait for a notice from the main thread to start rendering (or to stop entirely).
Platform::Semaphore_Wait(Sema_RenderStart);
if (!RenderThreadRunning) return;

// Protect the GPU state from the main thread.
// Some melonDS frontends (though not ours)
// will repeatedly save or load states;
// if they do so while the render thread is busy here,
// the ensuing race conditions may cause a crash
// (since some of the GPU state includes pointers).
RenderThreadRendering = true;
if (FrameIdentical)
{
{ // If no rendering is needed, just say we're done.
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
}
else
Expand All @@ -1751,7 +1786,10 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu)
RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
}

// Tell the main thread that we're done rendering
// and that it's safe to access the GPU state again.
Platform::Semaphore_Post(Sema_RenderDone);

RenderThreadRendering = false;
}
}
Expand All @@ -1761,6 +1799,9 @@ u32* SoftRenderer::GetLine(int line)
if (RenderThreadRunning.load(std::memory_order_relaxed))
{
if (line < 192)
// We need a scanline, so let's wait for the render thread to finish it.
// (both threads process scanlines from top-to-bottom,
// so we don't need to wait for a specific row)
Platform::Semaphore_Wait(Sema_ScanlineCount);
}

Expand Down
9 changes: 9 additions & 0 deletions src/GPU3D_Soft.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ class SoftRenderer : public Renderer3D
u32* GetLine(int line) override;

void SetupRenderThread(GPU& gpu);
void EnableRenderThread();
void StopRenderThread();
private:
friend void GPU3D::DoSavestate(Savestate* file) noexcept;
// Notes on the interpolator:
//
// This is a theory on how the DS hardware interpolates values. It matches hardware output
Expand Down Expand Up @@ -506,8 +508,15 @@ class SoftRenderer : public Renderer3D
Platform::Thread* RenderThread;
std::atomic_bool RenderThreadRunning;
std::atomic_bool RenderThreadRendering;

// Used by the main thread to tell the render thread to start rendering a frame
Platform::Semaphore* Sema_RenderStart;

// Used by the render thread to tell the main thread that it's done rendering a frame
Platform::Semaphore* Sema_RenderDone;

// Used to allow the main thread to read some scanlines
// before (the 3D portion of) the entire frame is rasterized.
Platform::Semaphore* Sema_ScanlineCount;
};
}
Loading
Loading