Skip to content

Commit

Permalink
Update parallelRDP and MoltenVK (#1634)
Browse files Browse the repository at this point in the history
  • Loading branch information
rasky committed Sep 3, 2024
1 parent 4ab8472 commit 7135646
Show file tree
Hide file tree
Showing 23 changed files with 49,458 additions and 54,244 deletions.
2 changes: 1 addition & 1 deletion ares/n64/vulkan/parallel-rdp/COMMIT
Original file line number Diff line number Diff line change
@@ -1 +1 @@
43bc31642cc70d04adb828a285e68cdbde7110a9
fe5becd13638873db90d46e7ba7d48255971f82a
10 changes: 5 additions & 5 deletions ares/n64/vulkan/parallel-rdp/parallel-rdp/rdp_renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ static int normalize_dzpix(int dz)
else if (dz == 0)
return 1;

unsigned bit = 31 - leading_zeroes(dz);
unsigned bit = 31 - Util::leading_zeroes(dz);
return 1 << (bit + 1);
}

Expand Down Expand Up @@ -1680,7 +1680,7 @@ void Renderer::submit_span_setup_jobs(Vulkan::CommandBuffer &cmd, bool upscale)
cmd.set_buffer_view(1, 0, *instance.gpu.span_info_jobs_view);
cmd.set_specialization_constant_mask(3);
cmd.set_specialization_constant(0, (upscale ? caps.upscaling : 1) * ImplementationConstants::DefaultWorkgroupSize);
cmd.set_specialization_constant(1, upscale ? trailing_zeroes(caps.upscaling) : 0u);
cmd.set_specialization_constant(1, upscale ? Util::trailing_zeroes(caps.upscaling) : 0u);

Vulkan::QueryPoolHandle begin_ts, end_ts;
if (caps.timestamp >= 2)
Expand Down Expand Up @@ -1780,7 +1780,7 @@ void Renderer::submit_rasterization(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &
if (caps.timestamp >= 2)
start_ts = cmd.write_timestamp(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);

uint32_t scale_log2_bit = (upscaling ? trailing_zeroes(caps.upscaling) : 0u) << RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET;
uint32_t scale_log2_bit = (upscaling ? Util::trailing_zeroes(caps.upscaling) : 0u) << RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET;

for (size_t i = 0; i < stream.static_raster_state_cache.size(); i++)
{
Expand Down Expand Up @@ -1891,7 +1891,7 @@ void Renderer::submit_tile_binning_combined(Vulkan::CommandBuffer &cmd, bool ups
if (supports_subgroup_size_control(32, subgroup_size))
{
cmd.enable_subgroup_size_control(true);
cmd.set_subgroup_size_log2(true, 5, trailing_zeroes(subgroup_size));
cmd.set_subgroup_size_log2(true, 5, Util::trailing_zeroes(subgroup_size));
}
}
else
Expand Down Expand Up @@ -2092,7 +2092,7 @@ void Renderer::submit_depth_blend(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &tm
cmd.set_specialization_constant(5, Limits::MaxPrimitives);
cmd.set_specialization_constant(6, upscaled ? caps.max_width : Limits::MaxWidth);
cmd.set_specialization_constant(7, uint32_t(force_write_mask || (!is_host_coherent && !upscaled)) |
((upscaled ? trailing_zeroes(caps.upscaling) : 0u) << 1u));
((upscaled ? Util::trailing_zeroes(caps.upscaling) : 0u) << 1u));

if (upscaled)
cmd.set_storage_buffer(0, 0, *upscaling_multisampled_rdram);
Expand Down
102,697 changes: 48,826 additions & 53,871 deletions ares/n64/vulkan/parallel-rdp/parallel-rdp/shaders/slangmosh.hpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ Vulkan::ImageHandle VideoInterface::vram_fetch_stage(const Registers &regs, unsi
async_cmd->set_specialization_constant_mask(7);
async_cmd->set_specialization_constant(0, uint32_t(rdram_size));
async_cmd->set_specialization_constant(1, regs.status & (VI_CONTROL_TYPE_MASK | VI_CONTROL_META_AA_BIT));
async_cmd->set_specialization_constant(2, trailing_zeroes(scaling_factor));
async_cmd->set_specialization_constant(2, Util::trailing_zeroes(scaling_factor));

async_cmd->push_constants(&push, 0, sizeof(push));
async_cmd->dispatch((extract_width + 15) / 16,
Expand Down
4 changes: 4 additions & 0 deletions ares/n64/vulkan/parallel-rdp/util/aligned_alloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,18 @@ struct AlignedAllocation
static void *operator new(size_t size)
{
void *ret = ::Util::memalign_alloc(alignof(T), size);
#ifdef __EXCEPTIONS
if (!ret) throw std::bad_alloc();
#endif
return ret;
}

static void *operator new[](size_t size)
{
void *ret = ::Util::memalign_alloc(alignof(T), size);
#ifdef __EXCEPTIONS
if (!ret) throw std::bad_alloc();
#endif
return ret;
}

Expand Down
37 changes: 23 additions & 14 deletions ares/n64/vulkan/parallel-rdp/util/bitops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,22 @@
namespace Util
{
#ifdef __GNUC__
#define leading_zeroes(x) ((x) == 0 ? 32 : __builtin_clz(x))
#define trailing_zeroes(x) ((x) == 0 ? 32 : __builtin_ctz(x))
#define trailing_ones(x) __builtin_ctz(~uint32_t(x))
#define leading_zeroes64(x) ((x) == 0 ? 64 : __builtin_clzll(x))
#define trailing_zeroes64(x) ((x) == 0 ? 64 : __builtin_ctzll(x))
#define trailing_ones64(x) __builtin_ctzll(~uint64_t(x))
#define popcount32(x) __builtin_popcount(x)
#define leading_zeroes_(x) ((x) == 0 ? 32 : __builtin_clz(x))
#define trailing_zeroes_(x) ((x) == 0 ? 32 : __builtin_ctz(x))
#define trailing_ones_(x) __builtin_ctz(~uint32_t(x))
#define leading_zeroes64_(x) ((x) == 0 ? 64 : __builtin_clzll(x))
#define trailing_zeroes64_(x) ((x) == 0 ? 64 : __builtin_ctzll(x))
#define trailing_ones64_(x) __builtin_ctzll(~uint64_t(x))
#define popcount32_(x) __builtin_popcount(x)

static inline uint32_t leading_zeroes(uint32_t x) { return leading_zeroes_(x); }
static inline uint32_t trailing_zeroes(uint32_t x) { return trailing_zeroes_(x); }
static inline uint32_t trailing_ones(uint32_t x) { return trailing_ones_(x); }
static inline uint32_t leading_zeroes64(uint64_t x) { return leading_zeroes64_(x); }
static inline uint32_t trailing_zeroes64(uint64_t x) { return trailing_zeroes64_(x); }
static inline uint32_t trailing_ones64(uint64_t x) { return trailing_ones64_(x); }
static inline uint32_t popcount32(uint32_t x) { return popcount32_(x); }

#elif defined(_MSC_VER)
namespace Internal
{
Expand Down Expand Up @@ -81,13 +90,13 @@ static inline uint32_t ctz64(uint64_t x)
}
}

#define popcount32(x) ::Util::Internal::popcount32(x)
#define leading_zeroes(x) ::Util::Internal::clz(x)
#define trailing_zeroes(x) ::Util::Internal::ctz(x)
#define trailing_ones(x) ::Util::Internal::ctz(~uint32_t(x))
#define leading_zeroes64(x) ::Util::Internal::clz64(x)
#define trailing_zeroes64(x) ::Util::Internal::ctz64(x)
#define trailing_ones64(x) ::Util::Internal::ctz64(~uint64_t(x))
static inline uint32_t leading_zeroes(uint32_t x) { return Internal::clz(x); }
static inline uint32_t trailing_zeroes(uint32_t x) { return Internal::ctz(x); }
static inline uint32_t trailing_ones(uint32_t x) { return Internal::ctz(~x); }
static inline uint32_t leading_zeroes64(uint64_t x) { return Internal::clz64(x); }
static inline uint32_t trailing_zeroes64(uint64_t x) { return Internal::ctz64(x); }
static inline uint32_t trailing_ones64(uint64_t x) { return Internal::ctz64(~x); }
static inline uint32_t popcount32(uint32_t x) { return Internal::popcount32(x); }
#else
#error "Implement me."
#endif
Expand Down
4 changes: 2 additions & 2 deletions ares/n64/vulkan/parallel-rdp/util/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ int64_t get_current_time_nsecs()
return int64_t(double(li.QuadPart) * static_qpc_freq.inv_freq);
#else
struct timespec ts = {};
#ifdef ANDROID
#if defined(ANDROID) || defined(__FreeBSD__)
constexpr auto timebase = CLOCK_MONOTONIC;
#else
constexpr auto timebase = CLOCK_MONOTONIC_RAW;
Expand All @@ -128,4 +128,4 @@ double Timer::end()
auto nt = get_current_time_nsecs();
return double(nt - t) * 1e-9;
}
}
}
Loading

0 comments on commit 7135646

Please sign in to comment.