From a780997f9674ef5b2085efc5b7df1507f36f8168 Mon Sep 17 00:00:00 2001 From: Konstantin Gindemit Date: Mon, 10 Jul 2023 21:54:38 +0200 Subject: [PATCH] ~ Fixes in pixman-arm-intrisics.cpp --- dependency/pixman/pixman-arm-intrisics.cpp | 20 ++++++++++---------- projects/CMake/CMakeLists.txt | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dependency/pixman/pixman-arm-intrisics.cpp b/dependency/pixman/pixman-arm-intrisics.cpp index 44eee7b..02a496d 100644 --- a/dependency/pixman/pixman-arm-intrisics.cpp +++ b/dependency/pixman/pixman-arm-intrisics.cpp @@ -22,25 +22,25 @@ extern "C" void pixman_composite_over_n_8888_asm_neon(int32_t w, int32_t h, int32_t dst_stride, uint32_t src) { - // Extract the source alpha and replicate it to all 8 lanes of a NEON vector - uint8x8_t v_src_alpha = vdup_n_u8(src >> 24); - // Extract the source color and replicate it to all 8 lanes of a NEON vector - uint8x8_t v_src_color = vdup_n_u8(src); + // Extract the source alpha and replicate it to all 16 lanes of a NEON vector + uint8x16_t v_src_alpha = vdupq_n_u8(src >> 24); + // Extract the source color and replicate it to all 16 lanes of a NEON vector + uint8x16_t v_src_color = vdupq_n_u8(src); for (int32_t y = 0; y < h; y++) { - for (int32_t x = 0; x < w; x += 8) + for (int32_t x = 0; x < w; x += 16) // Changed to 16 to match uint8x16_t { - // Load 8 destination pixels - uint8x8_t v_dst_color = vld1_u8((uint8_t *)(dst + x)); + // Load 16 destination pixels + uint8x16_t v_dst_color = vld1q_u8((uint8_t *)(dst + x)); // Calculate the result color = source color * source alpha + destination color * (1 - source alpha) // Note that we need to shift right by 8 because the alpha blending operation can result in values greater than 255 - uint8x8_t v_res_color = vshrq_n_u8(vmlaq_u8(vmlsq_u8(v_dst_color, v_dst_color, v_src_alpha), v_src_color, v_src_alpha), 8); + uint8x16_t v_res_color = vshrq_n_u8(vmlaq_u8(vmlsq_u8(v_dst_color, v_dst_color, v_src_alpha), v_src_color, v_src_alpha), 8); // Store the result to memory - vst1_u8((uint8_t *)(dst + x), v_res_color); + vst1q_u8((uint8_t *)(dst + x), v_res_color); } dst += dst_stride; } -} +} \ No newline at end of file diff --git a/projects/CMake/CMakeLists.txt b/projects/CMake/CMakeLists.txt index 5fd4f68..c2e855c 100644 --- a/projects/CMake/CMakeLists.txt +++ b/projects/CMake/CMakeLists.txt @@ -61,7 +61,7 @@ if (RLOTTIE_IOS) message("Compile asm source for iOS") target_compile_options(rlottie PUBLIC -fno-integrated-as) target_compile_definitions(rlottie PUBLIC USE_ARM_NEON __ARM64_NEON__ __ARM_NEON__) - target_sources(rlottie PRIVATE ${PIXMAN_ROOT}/pixman-arma64-neon-asm.S) + target_sources(rlottie PRIVATE ${PIXMAN_ROOT}/pixman-arm-intrisics.cpp) endif() if (WIN32)