Skip to content

Commit

Permalink
Fix ancient bug in CArchiveScanner::GetArchiveChecksum() that caused …
Browse files Browse the repository at this point in the history
…archive checksum to be defined only by the content and filename of the last file in archive.

This caused a ton of duplicate checksums in completely different assets and reduced efficiency of checksumming as method, something that no one noticed for years.
  • Loading branch information
lhog committed Jan 31, 2025
1 parent ef64e46 commit ca2d46e
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions rts/System/FileSystem/ArchiveScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ LOG_REGISTER_SECTION_GLOBAL(LOG_SECTION_ARCHIVESCANNER)
* but mapping them all, every time to make the list is)
*/

constexpr static int INTERNAL_VER = 18;
constexpr static int INTERNAL_VER = 19;


/*
Expand Down Expand Up @@ -612,13 +612,15 @@ void CArchiveScanner::ReadCache()
{
Clear();

const auto oldCacheFile = FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir()) + IntToString(INTERNAL_VER - 1, "ArchiveCache%i.lua");
cacheFile = FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir()) + IntToString(INTERNAL_VER , "ArchiveCache%i.lua");
cacheFile = FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir()) + IntToString(INTERNAL_VER, "ArchiveCache%i.lua");

if (!FileSystem::FileExists(cacheFile)) {
// Try to save initial scanning of assets, but will have to redo hashing
// as the previous version had bugs in that area
if (ReadCacheData(oldCacheFile, true)) {
// probe two previous versions
const auto vm1CacheFile = FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir()) + IntToString(INTERNAL_VER - 1, "ArchiveCache%i.lua");
const auto vm2CacheFile = FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir()) + IntToString(INTERNAL_VER - 2, "ArchiveCache%i.lua");
if (ReadCacheData(vm1CacheFile, true) || ReadCacheData(vm2CacheFile, true)) {
// nullify hashes
for (auto& ai : archiveInfos) {
memset(ai.checksum, 0, sizeof(ai.checksum));
Expand Down Expand Up @@ -1058,9 +1060,11 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive

// combine individual hashes, initialize to hash(name)
for (size_t i = 0; i < fileNames.size(); i++) {
sha512::calc_digest(reinterpret_cast<const uint8_t*>(fileNames[i].c_str()), fileNames[i].size(), archiveInfo.checksum);
sha512::raw_digest fileNameHash {0};
sha512::calc_digest(reinterpret_cast<const uint8_t*>(fileNames[i].c_str()), fileNames[i].size(), fileNameHash.data());

for (uint8_t j = 0; j < sha512::SHA_LEN; j++) {
archiveInfo.checksum[j] ^= fileNameHash[j];
archiveInfo.checksum[j] ^= fileHashes[i][j];
}

Expand Down Expand Up @@ -1534,8 +1538,7 @@ sha512::raw_digest CArchiveScanner::GetArchiveSingleChecksumBytes(const std::str

sha512::raw_digest CArchiveScanner::GetArchiveCompleteChecksumBytes(const std::string& name)
{
sha512::raw_digest checksum;
std::fill(checksum.begin(), checksum.end(), 0);
sha512::raw_digest checksum{0};

for (const std::string& depName: GetAllArchivesUsedBy(name)) {
const std::string& archiveName = ArchiveFromName(depName);
Expand Down

0 comments on commit ca2d46e

Please sign in to comment.