diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index 7708958f622f..fabe50985d25 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -313,24 +313,40 @@ void ARM64XEmitter::FlushIcache() void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end) { - if (cpu_info.sBugs.bExynos8890Invalidation) - { - // Over invalidate to force this CPU to listen. - start = m_startcode + 4096 < start ? start - 4096 : m_startcode; - end += 4096; - } - #if defined(IOS) // Header file says this is equivalent to: sys_icache_invalidate(start, end - start); sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); -#else -#if (defined(__clang__) && !defined(_M_IX86) && !defined(_M_X64)) || defined(ANDROID) - __clear_cache(start, end); -#else -#if !defined(_M_IX86) && !defined(_M_X64) - __builtin___clear_cache(start, end); -#endif -#endif +#elif !defined(_M_IX86) && !defined(_M_X64) + // Code from Dolphin, contributed by the Mono project. + + // Don't rely on GCC's __clear_cache implementation, as it caches + // icache/dcache cache line sizes, that can vary between cores on + // big.LITTLE architectures. + u64 addr, ctr_el0; + static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff; + size_t isize, dsize; + + __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); + isize = 4 << ((ctr_el0 >> 0) & 0xf); + dsize = 4 << ((ctr_el0 >> 16) & 0xf); + + // use the global minimum cache line size + icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize; + dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize; + + addr = (u64)start & ~(u64)(dsize - 1); + for (; addr < (u64)end; addr += dsize) + // use "civac" instead of "cvau", as this is the suggested workaround for + // Cortex-A53 errata 819472, 826319, 827319 and 824069. + __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory"); + __asm__ volatile("dsb ish" : : : "memory"); + + addr = (u64)start & ~(u64)(isize - 1); + for (; addr < (u64)end; addr += isize) + __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory"); + + __asm__ volatile("dsb ish" : : : "memory"); + __asm__ volatile("isb" : : : "memory"); #endif } diff --git a/Common/CPUDetect.h b/Common/CPUDetect.h index 25b8b42ee8e6..51cbcd1b0b9f 100644 --- a/Common/CPUDetect.h +++ b/Common/CPUDetect.h @@ -89,14 +89,15 @@ struct CPUInfo { bool bXBurst1; bool bXBurst2; - // Bugs + // Quirks struct { - // Samsung Galaxy S7 devices (Exynos 8890) have a bug that causes invalidation to work incorrectly. - // This may be caused by interaction between the separate CPU cores. - // Padding jit blocks and over-invalidating seems to "solve" it. - // Only affects ARM64. - bool bExynos8890Invalidation; - } sBugs; + // Samsung Galaxy S7 devices (Exynos 8890) have a big.LITTLE configuration where the cacheline size differs between big and LITTLE. + // GCC's cache clearing function would detect the cacheline size on one and keep it for later. When clearing + // with the wrong cacheline size on the other, that's an issue. In case we want to do something different in this + // situation in the future, let's keep this as a quirk, but our current code won't detect it reliably + // if it happens on new archs. We now use better clearing code on ARM64 that doesn't have this issue. + bool bExynos8890DifferingCachelineSizes; + } sQuirks; // Call Detect() explicit CPUInfo(); diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index 1654fcded869..3bbbb388dc2a 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -333,14 +333,6 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) { if (dontLogBlocks > 0) dontLogBlocks--; - if (cpu_info.sBugs.bExynos8890Invalidation) { - // What a waste. If we don't do both this and over-invalidate, the device crashes. - // This space won't ever get run, but it's wasted jit cache space. - for (int i = 0; i < 32; ++i) { - HINT(HINT_NOP); - } - } - // Don't forget to zap the newly written instructions in the instruction cache! FlushIcache(); diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index bc0fb00e19a2..83e305b78905 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -278,14 +278,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int RET(); - if (cpu_info.sBugs.bExynos8890Invalidation) { - // Apparently the vertex cache hasn't been the problem, but adding this here for the same - // reasons as the standard jit. - for (int i = 0; i < 32; ++i) { - HINT(HINT_NOP); - } - } - FlushIcache(); if (log) { diff --git a/android/jni/app-android.cpp b/android/jni/app-android.cpp index 672f9ff7d880..240ab8bda815 100644 --- a/android/jni/app-android.cpp +++ b/android/jni/app-android.cpp @@ -526,7 +526,7 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_init // Unfortunately, on the Samsung Galaxy S7, this isn't in /proc/cpuinfo. // We also can't read it from __system_property_get. if (buildBoard == "universal8890") { - cpu_info.sBugs.bExynos8890Invalidation = true; + cpu_info.sQuirks.bExynos8890DifferingCachelineSizes = true; } NativeGetAppInfo(&app_name, &app_nice_name, &landscape, &version);