Skip to content

Commit

Permalink
Merge pull request #17139 from hrydgard/replacement-padding-support
Browse files Browse the repository at this point in the history
Texture replacement: Improve padding support
  • Loading branch information
hrydgard authored Mar 17, 2023
2 parents 6a430f2 + 5f76fbe commit cd06b9c
Show file tree
Hide file tree
Showing 12 changed files with 119 additions and 56 deletions.
2 changes: 1 addition & 1 deletion Common/MemoryUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ void *AllocateAlignedMemory(size_t size, size_t alignment) {
#endif
#endif

_assert_msg_(ptr != nullptr, "Failed to allocate aligned memory");
_assert_msg_(ptr != nullptr, "Failed to allocate aligned memory of size %llu", size);
return ptr;
}

Expand Down
1 change: 0 additions & 1 deletion Core/FileSystems/MetaFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ static bool RealPath(const std::string &currentDirectory, const std::string &inP
size_t inLen = inPath.length();
if (inLen == 0)
{
WARN_LOG(FILESYS, "RealPath: inPath is empty");
outPath = currentDirectory;
return true;
}
Expand Down
100 changes: 75 additions & 25 deletions GPU/Common/ReplacedTexture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ void ReplacedTexture::PurgeIfNotUsedSinceTime(double t) {
alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;

// This means we have to reload. If we never purge any, there's no need.
SetState(ReplacementState::POPULATED);
SetState(ReplacementState::UNLOADED);
}

// This can only return true if ACTIVE or NOT_FOUND.
Expand All @@ -165,13 +165,10 @@ bool ReplacedTexture::IsReady(double budget) {
}
lastUsed_ = now;
return true;
case ReplacementState::UNINITIALIZED:
// _dbg_assert_(false);
return false;
case ReplacementState::CANCEL_INIT:
case ReplacementState::PENDING:
return false;
case ReplacementState::POPULATED:
case ReplacementState::UNLOADED:
// We're gonna need to spawn a task.
break;
}
Expand All @@ -195,6 +192,10 @@ bool ReplacedTexture::IsReady(double budget) {
return false;
}

inline uint32_t RoundUpTo4(uint32_t value) {
return (value + 3) & ~3;
}

void ReplacedTexture::Prepare(VFSBackend *vfs) {
this->vfs_ = vfs;

Expand Down Expand Up @@ -259,16 +260,26 @@ void ReplacedTexture::Prepare(VFSBackend *vfs) {
return;
}

// Update the level dimensions.
for (auto &level : levels_) {
level.fullW = (level.w * desc_.w) / desc_.newW;
level.fullH = (level.h * desc_.h) / desc_.newH;

int blockSize;
bool bc = Draw::DataFormatIsBlockCompressed(fmt, &blockSize);
if (!bc) {
level.fullDataSize = level.fullW * level.fullH * 4;
} else {
level.fullDataSize = RoundUpTo4(level.fullW) * RoundUpTo4(level.fullH) * blockSize / 16;
}
}

SetState(ReplacementState::ACTIVE);

if (threadWaitable_)
threadWaitable_->Notify();
}

inline uint32_t RoundUpTo4(uint32_t value) {
return (value + 3) & ~3;
}

// Returns true if Prepare should keep calling this to load more levels.
ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference *fileRef, const std::string &filename, int mipLevel, Draw::DataFormat *pixelFormat) {
bool good = false;
Expand Down Expand Up @@ -393,21 +404,17 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
ERROR_LOG(G3D, "Could not load texture replacement info: %s - unsupported format %s", filename.c_str(), magic.c_str());
}


// Already populated from cache. TODO: Move this above the first read, and take level.w/h from the cache.
if (!data_[mipLevel].empty()) {
vfs_->CloseFile(openFile);
*pixelFormat = fmt;
return LoadLevelResult::DONE;
}

// Is this really the right place to do it?
level.w = (level.w * desc_.w) / desc_.newW;
level.h = (level.h * desc_.h) / desc_.newH;

if (good && mipLevel != 0) {
// Check that the mipmap size is correct. Can't load mips of the wrong size.
if (level.w != (levels_[0].w >> mipLevel) || level.h != (levels_[0].h >> mipLevel)) {
// If loading a low mip directly (through png most likely), check that the mipmap size is correct.
// Can't load mips of the wrong size.
if (level.w != std::max(1, (levels_[0].w >> mipLevel)) || level.h != std::max(1, (levels_[0].h >> mipLevel))) {
WARN_LOG(G3D, "Replacement mipmap invalid: size=%dx%d, expected=%dx%d (level %d)",
level.w, level.h, levels_[0].w >> mipLevel, levels_[0].h >> mipLevel, mipLevel);
good = false;
Expand Down Expand Up @@ -662,7 +669,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
return LoadLevelResult::LOAD_ERROR;
}

bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
bool ReplacedTexture::CopyLevelTo(int level, uint8_t *out, size_t outDataSize, int rowPitch) {
_assert_msg_((size_t)level < levels_.size(), "Invalid miplevel");
_assert_msg_(out != nullptr && rowPitch > 0, "Invalid out/pitch");

Expand All @@ -671,6 +678,13 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
return false;
}

// We pad the images right here during the copy.
// TODO: Add support for the texture cache to scale texture coordinates instead.
// It already supports this for render target textures that aren't powers of 2.

int outW = levels_[level].fullW;
int outH = levels_[level].fullH;

// We probably could avoid this lock, but better to play it safe.
std::lock_guard<std::mutex> guard(lock_);

Expand All @@ -684,9 +698,15 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {

#define PARALLEL_COPY

if (fmt == Draw::DataFormat::R8G8B8A8_UNORM) {
int blockSize;
if (!Draw::DataFormatIsBlockCompressed(fmt, &blockSize)) {
if (fmt != Draw::DataFormat::R8G8B8A8_UNORM) {
ERROR_LOG(G3D, "Unexpected linear data format");
return false;
}

if (rowPitch < info.w * 4) {
ERROR_LOG(G3D, "Replacement rowPitch=%d, but w=%d (level=%d)", rowPitch, info.w * 4, level);
ERROR_LOG(G3D, "Replacement rowPitch=%d, but w=%d (level=%d) (too small)", rowPitch, info.w * 4, level);
return false;
}

Expand All @@ -702,32 +722,62 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
#ifdef PARALLEL_COPY
const int MIN_LINES_PER_THREAD = 4;
ParallelRangeLoop(&g_threadManager, [&](int l, int h) {
int extraPixels = outW - info.w;
for (int y = l; y < h; ++y) {
memcpy((uint8_t *)out + rowPitch * y, data.data() + info.w * 4 * y, info.w * 4);
// Fill the rest of the line with black.
memset((uint8_t *)out + rowPitch * y + info.w * 4, 0, extraPixels * 4);
}
}, 0, info.h, MIN_LINES_PER_THREAD);
#else
for (int y = 0; y < info.h; ++y) {
memcpy((uint8_t *)out + rowPitch * y, data.data() + info.w * 4 * y, info.w * 4);
}
#endif
// Memset the rest of the padding to avoid leaky edge pixels. Guess we could parallelize this too, but meh.
for (int y = info.h; y < outH; y++) {
uint8_t *dest = (uint8_t *)out + rowPitch * y;
memset(dest, 0, outW * 4);
}
}
} else {
#ifdef PARALLEL_COPY
// TODO: Add sanity checks here for other formats?
ParallelMemcpy(&g_threadManager, out, data.data(), data.size());
#else
memcpy(out, data.data(), data.size());
// Only parallel copy in the simple case for now.
if (info.w == outW && info.h == outH) {
// TODO: Add sanity checks here for other formats?
ParallelMemcpy(&g_threadManager, out, data.data(), data.size());
return true;
}
#endif
// Alright, so careful copying of blocks it is, padding with zero-blocks as needed.
int inBlocksW = (info.w + 3) / 4;
int inBlocksH = (info.h + 3) / 4;
int outBlocksW = (info.fullW + 3) / 4;
int outBlocksH = (info.fullH + 3) / 4;

int paddingBlocksX = outBlocksW - inBlocksW;

// Copy all the known blocks, and zero-fill out the lines.
for (int y = 0; y < inBlocksH; y++) {
const uint8_t *input = data.data() + y * inBlocksW * blockSize;
uint8_t *output = (uint8_t *)out + y * outBlocksW * blockSize;
memcpy(output, input, inBlocksW * blockSize);
memset(output + inBlocksW * blockSize, 0, paddingBlocksX * blockSize);
}

// Vertical zero-padding.
for (int y = inBlocksH; y < outBlocksH; y++) {
uint8_t *output = (uint8_t *)out + y * outBlocksW * blockSize;
memset(output, 0, outBlocksW * blockSize);
}
}

return true;
}

const char *StateString(ReplacementState state) {
switch (state) {
case ReplacementState::UNINITIALIZED: return "UNINITIALIZED";
case ReplacementState::POPULATED: return "PREPARED";
case ReplacementState::UNLOADED: return "PREPARED";
case ReplacementState::PENDING: return "PENDING";
case ReplacementState::NOT_FOUND: return "NOT_FOUND";
case ReplacementState::ACTIVE: return "ACTIVE";
Expand Down
23 changes: 14 additions & 9 deletions GPU/Common/ReplacedTexture.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ enum class ReplacedImageType {
static const int MAX_REPLACEMENT_MIP_LEVELS = 12; // 12 should be plenty, 8 is the max mip levels supported by the PSP.

enum class ReplacementState : uint32_t {
UNINITIALIZED,
POPULATED, // We located the texture files but have not started the thread.
UNLOADED,
PENDING,
NOT_FOUND, // Also used on error loading the images.
ACTIVE,
Expand Down Expand Up @@ -94,8 +93,14 @@ struct ReplacedTextureRef {

// Metadata about a given texture level.
struct ReplacedTextureLevel {
// Data dimensions
int w = 0;
int h = 0;
// PSP texture dimensions
int fullW = 0;
int fullH = 0;

int fullDataSize = 0;

// To be able to reload, we need to be able to reopen, unfortunate we can't use zip_file_t.
// TODO: This really belongs on the level in the cache, not in the individual ReplacedTextureLevel objects.
Expand All @@ -122,13 +127,13 @@ class ReplacedTexture {
void GetSize(int level, int *w, int *h) const {
_dbg_assert_(State() == ReplacementState::ACTIVE);
_dbg_assert_(level < levels_.size());
*w = levels_[level].w;
*h = levels_[level].h;
*w = levels_[level].fullW;
*h = levels_[level].fullH;
}

int GetLevelDataSize(int level) const {
_dbg_assert_(State() == ReplacementState::ACTIVE);
return (int)data_[level].size();
int GetLevelDataSizeAfterCopy(int level) const {
// Includes padding etc.
return levels_[level].fullDataSize;
}

size_t GetTotalDataSize() const {
Expand Down Expand Up @@ -157,7 +162,7 @@ class ReplacedTexture {
}

bool IsReady(double budget);
bool CopyLevelTo(int level, void *out, int rowPitch);
bool CopyLevelTo(int level, uint8_t *out, size_t outDataSize, int rowPitch);

std::string logId_;

Expand All @@ -182,7 +187,7 @@ class ReplacedTexture {
ReplacedTextureAlpha alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
double lastUsed = 0.0;

std::atomic<ReplacementState> state_ = ReplacementState::POPULATED;
std::atomic<ReplacementState> state_ = ReplacementState::UNLOADED;

VFSBackend *vfs_ = nullptr;
ReplacementDesc desc_;
Expand Down
8 changes: 4 additions & 4 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1545,9 +1545,8 @@ ReplacedTexture *TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &
}

switch (replaced->State()) {
case ReplacementState::POPULATED:
case ReplacementState::UNLOADED:
case ReplacementState::PENDING:
case ReplacementState::UNINITIALIZED:
// Make sure we keep polling.
entry->status |= TexCacheEntry::STATUS_TO_REPLACE;
break;
Expand Down Expand Up @@ -2883,7 +2882,8 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
return true;
}

void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, int stride, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) {
// Passing 0 into dataSize will disable checking.
void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, size_t dataSize, int stride, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) {
int w = gstate.getTextureWidth(srcLevel);
int h = gstate.getTextureHeight(srcLevel);

Expand All @@ -2892,7 +2892,7 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
if (plan.replaceValid) {
plan.replaced->GetSize(srcLevel, &w, &h);
double replaceStart = time_now_d();
plan.replaced->CopyLevelTo(srcLevel, data, stride);
plan.replaced->CopyLevelTo(srcLevel, data, dataSize, stride);
replacementTimeThisFrame_ += time_now_d() - replaceStart;
} else {
GETextureFormat tfmt = (GETextureFormat)entry.format;
Expand Down
3 changes: 2 additions & 1 deletion GPU/Common/TextureCacheCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ struct TexCacheEntry {
u32 fullhash;
u32 cluthash;
u16 maxSeenV;
ReplacedTexture *replacedTexture;

TexStatus GetHashStatus() {
return TexStatus(status & STATUS_MASK);
Expand Down Expand Up @@ -385,7 +386,7 @@ class TextureCacheCommon {
ReplacedTexture *FindReplacement(TexCacheEntry *entry, int &w, int &h, int &d);

// Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory.
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, size_t dataSize, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);

template <typename T>
inline const T *GetCurrentClut() {
Expand Down
11 changes: 8 additions & 3 deletions GPU/Common/TextureReplacer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -904,12 +904,14 @@ bool TextureReplacer::GenerateIni(const std::string &gameID, Path &generatedFile

// Let's also write some defaults.
fprintf(f, R"(# This file is optional and describes your textures.
# Some information on syntax available here:
# https://github.com/hrydgard/ppsspp/wiki/Texture-replacement-ini-syntax
# Documentation about the options and syntax is available here:
# https://www.ppsspp.org/docs/reference/texture-replacement
[options]
version = 1
hash = quick
ignoreMipmap = false
ignoreMipmap = false # Set to true to avoid dumping mipmaps. Instead use basisu to generate them, see docs.
reduceHash = false # Usually a good idea to use.
allowVideo = false
[games]
# Used to make it easier to install, and override settings for other regions.
Expand All @@ -921,8 +923,11 @@ ignoreMipmap = false
# See wiki for more info.
[hashranges]
# See the documentation.
# Example: 08b31020,512,512 = 480,272
[filtering]
# You can enforce specific filtering modes with this. See the docs.
[reducehashranges]
)", gameID.c_str(), INI_FILENAME.c_str());
Expand Down
4 changes: 3 additions & 1 deletion GPU/Common/TextureReplacer.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,20 @@ class TextureReplacer {
bool allowVideo_ = false;
bool ignoreAddress_ = false;
bool reduceHash_ = false;
bool ignoreMipmap_ = false;

float reduceHashSize = 1.0f; // default value with reduceHash to false
float reduceHashGlobalValue = 0.5f; // Global value for textures dump pngs of all sizes, 0.5 by default but can be set in textures.ini

double lastTextureCacheSizeGB_ = 0.0;
bool ignoreMipmap_ = false;
std::string gameID_;
Path basePath_;
Path newTextureDir_;
ReplacedTextureHash hash_ = ReplacedTextureHash::QUICK;

VFSBackend *vfs_ = nullptr;
bool vfsIsZip_ = false;

GPUFormatSupport formatSupport_{};

typedef std::pair<int, int> WidthHeightPair;
Expand Down
6 changes: 3 additions & 3 deletions GPU/D3D11/TextureCacheD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,8 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
if (plan.replaceValid) {
int blockSize = 0;
if (Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), &blockSize)) {
stride = ((mipWidth + 3) & ~3) * blockSize / 4; // This stride value doesn't quite make sense to me, but it works?
dataSize = plan.replaced->GetLevelDataSize(i);
stride = ((mipWidth + 3) & ~3) * blockSize / 4; // Number of blocks * 4 * Size of a block / 4
dataSize = plan.replaced->GetLevelDataSizeAfterCopy(i);
} else {
int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format());
stride = std::max(mipWidth * bpp, 16);
Expand Down Expand Up @@ -338,7 +338,7 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
return;
}

LoadTextureLevel(*entry, data, stride, plan, srcLevel, texFmt, TexDecodeFlags{});
LoadTextureLevel(*entry, data, 0, stride, plan, srcLevel, texFmt, TexDecodeFlags{});
}

int tw;
Expand Down
Loading

0 comments on commit cd06b9c

Please sign in to comment.