diff --git a/CMakeLists.txt b/CMakeLists.txt index 8845f610ab..5f6c08bd4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,7 @@ if (ANDROID) set(ANDROID_PACKAGE_SOURCE_DIR ${CMAKE_BINARY_DIR}/android-template) endif() -find_package(Qt6 COMPONENTS Concurrent Core Qml Gui Xml Positioning Widgets Network Quick Svg Sql Sensors WebView Multimedia Bluetooth Nfc WebSockets REQUIRED) +find_package(Qt6 COMPONENTS Concurrent Core Qml Gui Xml Positioning Widgets Network Quick Svg Sql Sensors WebView Multimedia Bluetooth Nfc WebSockets Quick3D REQUIRED) if(NOT CMAKE_SYSTEM_NAME STREQUAL "iOS") find_package(Qt6 COMPONENTS PrintSupport REQUIRED) diff --git a/cmake/qgis-cmake-wrapper.cmake b/cmake/qgis-cmake-wrapper.cmake index 75f92c31e6..c28e619076 100644 --- a/cmake/qgis-cmake-wrapper.cmake +++ b/cmake/qgis-cmake-wrapper.cmake @@ -134,6 +134,8 @@ if(TRUE) # Should possibly have a "static only" check endif() find_package(poly2tri CONFIG) target_link_libraries(QGIS::Core INTERFACE poly2tri::poly2tri) + find_package(meshoptimizer CONFIG REQUIRED) + target_link_libraries(QGIS::Core INTERFACE meshoptimizer::meshoptimizer) pkg_check_modules(freexl REQUIRED IMPORTED_TARGET freexl) target_link_libraries(QGIS::Core INTERFACE PkgConfig::freexl) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index da39d2766e..db9ae84ab7 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -354,6 +354,7 @@ target_link_libraries( Qt::WebView Qt::Multimedia Qt::WebSockets + Qt::Quick3D QGIS::Core QGIS::Analysis ZXing::ZXing diff --git a/src/qml/Dummy.qml b/src/qml/Dummy.qml new file mode 100644 index 0000000000..38ec82db14 --- /dev/null +++ b/src/qml/Dummy.qml @@ -0,0 +1,9 @@ +// This file is used to add extra imports for the qml import scanner to add additional modules +import QtQuick3D +import QtQuick3D.AssetUtils +import QtQuick3D.Helpers +import QtQuick3D.Particles3D +import QtWebSockets + +Item { +} diff --git a/src/qml/qgismobileapp.qml b/src/qml/qgismobileapp.qml index e046e3ec3a..ba87f5c057 100644 --- a/src/qml/qgismobileapp.qml +++ b/src/qml/qgismobileapp.qml @@ -14,6 +14,7 @@ * (at your option) any later version. * * * ***************************************************************************/ +import QtCore import QtQuick import QtQuick.Controls import QtQuick.Controls.Material @@ -21,8 +22,6 @@ import QtQuick.Effects import QtQuick.Window import QtQml import QtSensors -import QtCore -import QtWebSockets // Not used here but added so QML registers its dependencies for plugins to use import org.qgis import org.qfield import Theme diff --git a/src/qml/qml.qrc b/src/qml/qml.qrc index d753b191f4..1381aab6fc 100644 --- a/src/qml/qml.qrc +++ b/src/qml/qml.qrc @@ -13,6 +13,7 @@ CoordinateLocator.qml DashBoard.qml DigitizingToolbar.qml + Dummy.qml ElevationProfile.qml FeatureForm.qml FeatureListForm.qml diff --git a/vcpkg.json b/vcpkg.json index 94a161e1bd..b5b3425798 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -84,6 +84,7 @@ "qml" ] }, + "qtquick3d", { "name": "qtsensors", "features": [ diff --git a/vcpkg/ports/qgis/meshoptimizer.patch b/vcpkg/ports/qgis/meshoptimizer.patch new file mode 100644 index 0000000000..7401ce3213 --- /dev/null +++ b/vcpkg/ports/qgis/meshoptimizer.patch @@ -0,0 +1,60 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index e299cb1d617..ae6482b4e90 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -255,6 +255,7 @@ if(WITH_CORE) + + # try to configure and build POLY2TRI support + set (WITH_INTERNAL_POLY2TRI TRUE CACHE BOOL "Determines whether POLY2TRI should be built from internal copy") ++ set (WITH_INTERNAL_MESHOPTIMIZER TRUE CACHE BOOL "Determines whether MESHOPTIMIZER should be built from internal copy") + + # try to configure and build POSTGRESQL support + set (WITH_POSTGRESQL TRUE CACHE BOOL "Determines whether POSTGRESQL support should be built") +diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt +index 94c0e143001..777bd4bcb21 100644 +--- a/src/core/CMakeLists.txt ++++ b/src/core/CMakeLists.txt +@@ -19,6 +19,18 @@ if (WITH_PDF4QT) + SUBDIRS(${CMAKE_SOURCE_DIR}/external/PDF4QT) + endif() + ++if(WITH_INTERNAL_MESHOPTIMIZER) ++ add_library(STATIC meshoptimizer::meshoptimizer ++ ${CMAKE_SOURCE_DIR}/external/meshOptimizer/simplifier.cpp ++ ) ++ ++ target_include_directories(meshoptimizer::meshoptimizer ++ ${CMAKE_SOURCE_DIR}/external/meshOptimizer ++ ) ++else() ++ find_package(meshoptimizer CONFIG REQUIRED) ++endif() ++ + set(QGIS_CORE_SRCS + ${CMAKE_SOURCE_DIR}/external/kdbush/include/kdbush.hpp + +@@ -30,8 +42,6 @@ set(QGIS_CORE_SRCS + ${CMAKE_SOURCE_DIR}/external/nmea/time.c + ${CMAKE_SOURCE_DIR}/external/nmea/tok.c + +- ${CMAKE_SOURCE_DIR}/external/meshOptimizer/simplifier.cpp +- + ${FLEX_QgsExpressionLexer_OUTPUTS} + ${BISON_QgsExpressionParser_OUTPUTS} + ${FLEX_QgsSqlStatementLexer_OUTPUTS} +@@ -2375,7 +2385,6 @@ target_include_directories(qgis_core PUBLIC + ${CMAKE_SOURCE_DIR}/external/kdbush/include + ${CMAKE_SOURCE_DIR}/external/nmea + ${CMAKE_SOURCE_DIR}/external/rtree/include +- ${CMAKE_SOURCE_DIR}/external/meshOptimizer + ${CMAKE_SOURCE_DIR}/external/tinygltf + ) + +@@ -2485,6 +2494,7 @@ target_link_libraries(qgis_core + ${ZLIB_LIBRARIES} + ${EXIV2_LIBRARY} + PROJ::proj ++ meshoptimizer::meshoptimizer + ) + + if(BUILD_WITH_QT6) diff --git a/vcpkg/ports/qgis/portfile.cmake b/vcpkg/ports/qgis/portfile.cmake index 523f53e77e..af4cba1d26 100644 --- a/vcpkg/ports/qgis/portfile.cmake +++ b/vcpkg/ports/qgis/portfile.cmake @@ -18,6 +18,7 @@ vcpkg_from_github( include-qthread.patch processing.patch # Needed to avoid link issue with tinygltf (ATM embedded into QGIS) and _GEOSQueryCallback defined multiple times font_download.patch + meshoptimizer.patch # Unvendor meshoptimizer ) file(REMOVE ${SOURCE_PATH}/cmake/FindGDAL.cmake) @@ -38,6 +39,7 @@ list(APPEND QGIS_OPTIONS "-DWITH_QSPATIALITE:BOOL=OFF") list(APPEND QGIS_OPTIONS "-DWITH_PDAL:BOOL=OFF") list(APPEND QGIS_OPTIONS "-DWITH_DRACO:BOOL=ON") list(APPEND QGIS_OPTIONS "-DWITH_INTERNAL_POLY2TRI:BOOL=OFF") +list(APPEND QGIS_OPTIONS "-DWITH_INTERNAL_MESHOPTIMIZER:BOOL=OFF") list(APPEND QGIS_OPTIONS "-DBISON_EXECUTABLE=${BISON}") list(APPEND QGIS_OPTIONS "-DFLEX_EXECUTABLE=${FLEX}") diff --git a/vcpkg/ports/qgis/vcpkg.json b/vcpkg/ports/qgis/vcpkg.json index 3d71b5ddac..c597b938f8 100644 --- a/vcpkg/ports/qgis/vcpkg.json +++ b/vcpkg/ports/qgis/vcpkg.json @@ -30,6 +30,7 @@ }, "libxml2", "libzip", + "meshoptimizer", "proj", "protobuf", { diff --git a/vcpkg/ports/qtquick3d/0001-devendor-meshoptimizer.patch b/vcpkg/ports/qtquick3d/0001-devendor-meshoptimizer.patch new file mode 100644 index 0000000000..6cba201d21 --- /dev/null +++ b/vcpkg/ports/qtquick3d/0001-devendor-meshoptimizer.patch @@ -0,0 +1,8977 @@ +From b35dbb67688edf360342ff238fbc1b8e3a3f67d4 Mon Sep 17 00:00:00 2001 +From: Matthias Kuhn +Date: Mon, 21 Oct 2024 23:02:47 +0200 +Subject: [PATCH] devendor meshoptimizer + +--- + src/3rdparty/meshoptimizer/LICENSE.md | 21 - + .../meshoptimizer/qt_attribution.json | 14 - + src/3rdparty/meshoptimizer/src/allocator.cpp | 8 - + .../meshoptimizer/src/clusterizer.cpp | 856 --------- + src/3rdparty/meshoptimizer/src/indexcodec.cpp | 674 ------- + .../meshoptimizer/src/indexgenerator.cpp | 551 ------ + .../meshoptimizer/src/meshoptimizer.h | 1069 ----------- + .../meshoptimizer/src/overdrawanalyzer.cpp | 230 --- + .../meshoptimizer/src/overdrawoptimizer.cpp | 333 ---- + src/3rdparty/meshoptimizer/src/simplifier.cpp | 1677 ----------------- + .../meshoptimizer/src/spatialorder.cpp | 194 -- + src/3rdparty/meshoptimizer/src/stripifier.cpp | 295 --- + .../meshoptimizer/src/vcacheanalyzer.cpp | 73 - + .../meshoptimizer/src/vcacheoptimizer.cpp | 473 ----- + .../meshoptimizer/src/vertexcodec.cpp | 1195 ------------ + .../meshoptimizer/src/vertexfilter.cpp | 962 ---------- + .../meshoptimizer/src/vfetchanalyzer.cpp | 58 - + .../meshoptimizer/src/vfetchoptimizer.cpp | 74 - + src/utils/CMakeLists.txt | 36 +- + 19 files changed, 3 insertions(+), 8790 deletions(-) + delete mode 100644 src/3rdparty/meshoptimizer/LICENSE.md + delete mode 100644 src/3rdparty/meshoptimizer/qt_attribution.json + delete mode 100644 src/3rdparty/meshoptimizer/src/allocator.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/clusterizer.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/indexcodec.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/indexgenerator.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/meshoptimizer.h + delete mode 100644 src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/simplifier.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/spatialorder.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/stripifier.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/vertexcodec.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/vertexfilter.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/vfetchanalyzer.cpp + delete mode 100644 src/3rdparty/meshoptimizer/src/vfetchoptimizer.cpp + +diff --git a/src/3rdparty/meshoptimizer/LICENSE.md b/src/3rdparty/meshoptimizer/LICENSE.md +deleted file mode 100644 +index b673c24..0000000 +--- a/src/3rdparty/meshoptimizer/LICENSE.md ++++ /dev/null +@@ -1,21 +0,0 @@ +-MIT License +- +-Copyright (c) 2016-2022 Arseny Kapoulkine +- +-Permission is hereby granted, free of charge, to any person obtaining a copy +-of this software and associated documentation files (the "Software"), to deal +-in the Software without restriction, including without limitation the rights +-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +-copies of the Software, and to permit persons to whom the Software is +-furnished to do so, subject to the following conditions: +- +-The above copyright notice and this permission notice shall be included in all +-copies or substantial portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-SOFTWARE. +diff --git a/src/3rdparty/meshoptimizer/qt_attribution.json b/src/3rdparty/meshoptimizer/qt_attribution.json +deleted file mode 100644 +index c7ff110..0000000 +--- a/src/3rdparty/meshoptimizer/qt_attribution.json ++++ /dev/null +@@ -1,14 +0,0 @@ +-{ +- "Id": "meshoptimizer", +- "Name": "meshoptimizer", +- "QDocModule": "qtquick3d", +- "Description": "Provides algorithms to help optimize meshes for GPU render stages, reduce the mesh complexity and storage overhead.", +- "QtUsage": "Used to generate Level of Detail meshes for Models in Qt Quick 3D", +- +- "Homepage": "https://github.com/zeux/meshoptimizer", +- "Version": "v0.18", +- "License": "MIT License", +- "LicenseId": "MIT", +- "LicenseFile": "LICENSE.md", +- "Copyright": "Copyright (c) 2016-2022 Arseny Kapoulkine" +-} +diff --git a/src/3rdparty/meshoptimizer/src/allocator.cpp b/src/3rdparty/meshoptimizer/src/allocator.cpp +deleted file mode 100644 +index 072e8e5..0000000 +--- a/src/3rdparty/meshoptimizer/src/allocator.cpp ++++ /dev/null +@@ -1,8 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*)) +-{ +- meshopt_Allocator::Storage::allocate = allocate; +- meshopt_Allocator::Storage::deallocate = deallocate; +-} +diff --git a/src/3rdparty/meshoptimizer/src/clusterizer.cpp b/src/3rdparty/meshoptimizer/src/clusterizer.cpp +deleted file mode 100644 +index b1f7b35..0000000 +--- a/src/3rdparty/meshoptimizer/src/clusterizer.cpp ++++ /dev/null +@@ -1,856 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +-#include +-#include +- +-// This work is based on: +-// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016 +-// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016 +-// Jack Ritter. An Efficient Bounding Sphere. 1990 +-namespace meshopt +-{ +- +-// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet +-const size_t kMeshletMaxVertices = 255; +- +-// A reasonable limit is around 2*max_vertices or less +-const size_t kMeshletMaxTriangles = 512; +- +-struct TriangleAdjacency2 +-{ +- unsigned int* counts; +- unsigned int* offsets; +- unsigned int* data; +-}; +- +-static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +-{ +- size_t face_count = index_count / 3; +- +- // allocate arrays +- adjacency.counts = allocator.allocate(vertex_count); +- adjacency.offsets = allocator.allocate(vertex_count); +- adjacency.data = allocator.allocate(index_count); +- +- // fill triangle counts +- memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- assert(indices[i] < vertex_count); +- +- adjacency.counts[indices[i]]++; +- } +- +- // fill offset table +- unsigned int offset = 0; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- adjacency.offsets[i] = offset; +- offset += adjacency.counts[i]; +- } +- +- assert(offset == index_count); +- +- // fill triangle data +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; +- +- adjacency.data[adjacency.offsets[a]++] = unsigned(i); +- adjacency.data[adjacency.offsets[b]++] = unsigned(i); +- adjacency.data[adjacency.offsets[c]++] = unsigned(i); +- } +- +- // fix offsets that have been disturbed by the previous pass +- for (size_t i = 0; i < vertex_count; ++i) +- { +- assert(adjacency.offsets[i] >= adjacency.counts[i]); +- +- adjacency.offsets[i] -= adjacency.counts[i]; +- } +-} +- +-static void computeBoundingSphere(float result[4], const float points[][3], size_t count) +-{ +- assert(count > 0); +- +- // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates +- size_t pmin[3] = {0, 0, 0}; +- size_t pmax[3] = {0, 0, 0}; +- +- for (size_t i = 0; i < count; ++i) +- { +- const float* p = points[i]; +- +- for (int axis = 0; axis < 3; ++axis) +- { +- pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis]; +- pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis]; +- } +- } +- +- // find the pair of points with largest distance +- float paxisd2 = 0; +- int paxis = 0; +- +- for (int axis = 0; axis < 3; ++axis) +- { +- const float* p1 = points[pmin[axis]]; +- const float* p2 = points[pmax[axis]]; +- +- float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]); +- +- if (d2 > paxisd2) +- { +- paxisd2 = d2; +- paxis = axis; +- } +- } +- +- // use the longest segment as the initial sphere diameter +- const float* p1 = points[pmin[paxis]]; +- const float* p2 = points[pmax[paxis]]; +- +- float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2}; +- float radius = sqrtf(paxisd2) / 2; +- +- // iteratively adjust the sphere up until all points fit +- for (size_t i = 0; i < count; ++i) +- { +- const float* p = points[i]; +- float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]); +- +- if (d2 > radius * radius) +- { +- float d = sqrtf(d2); +- assert(d > 0); +- +- float k = 0.5f + (radius / d) / 2; +- +- center[0] = center[0] * k + p[0] * (1 - k); +- center[1] = center[1] * k + p[1] * (1 - k); +- center[2] = center[2] * k + p[2] * (1 - k); +- radius = (radius + d) / 2; +- } +- } +- +- result[0] = center[0]; +- result[1] = center[1]; +- result[2] = center[2]; +- result[3] = radius; +-} +- +-struct Cone +-{ +- float px, py, pz; +- float nx, ny, nz; +-}; +- +-static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius) +-{ +- float cone = 1.f - spread * cone_weight; +- float cone_clamped = cone < 1e-3f ? 1e-3f : cone; +- +- return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped; +-} +- +-static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count) +-{ +- Cone result = acc; +- +- float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count); +- +- result.px *= center_scale; +- result.py *= center_scale; +- result.pz *= center_scale; +- +- float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz; +- float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length); +- +- result.nx *= axis_scale; +- result.ny *= axis_scale; +- result.nz *= axis_scale; +- +- return result; +-} +- +-static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- (void)vertex_count; +- +- size_t vertex_stride_float = vertex_positions_stride / sizeof(float); +- size_t face_count = index_count / 3; +- +- float mesh_area = 0; +- +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; +- assert(a < vertex_count && b < vertex_count && c < vertex_count); +- +- const float* p0 = vertex_positions + vertex_stride_float * a; +- const float* p1 = vertex_positions + vertex_stride_float * b; +- const float* p2 = vertex_positions + vertex_stride_float * c; +- +- float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; +- float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; +- +- float normalx = p10[1] * p20[2] - p10[2] * p20[1]; +- float normaly = p10[2] * p20[0] - p10[0] * p20[2]; +- float normalz = p10[0] * p20[1] - p10[1] * p20[0]; +- +- float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); +- float invarea = (area == 0.f) ? 0.f : 1.f / area; +- +- triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f; +- triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f; +- triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f; +- +- triangles[i].nx = normalx * invarea; +- triangles[i].ny = normaly * invarea; +- triangles[i].nz = normalz * invarea; +- +- mesh_area += area; +- } +- +- return mesh_area; +-} +- +-static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles) +-{ +- size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3; +- +- // fill 4b padding with 0 +- while (offset & 3) +- meshlet_triangles[offset++] = 0; +-} +- +-static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles) +-{ +- unsigned char& av = used[a]; +- unsigned char& bv = used[b]; +- unsigned char& cv = used[c]; +- +- bool result = false; +- +- unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); +- +- if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) +- { +- meshlets[meshlet_offset] = meshlet; +- +- for (size_t j = 0; j < meshlet.vertex_count; ++j) +- used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff; +- +- finishMeshlet(meshlet, meshlet_triangles); +- +- meshlet.vertex_offset += meshlet.vertex_count; +- meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding +- meshlet.vertex_count = 0; +- meshlet.triangle_count = 0; +- +- result = true; +- } +- +- if (av == 0xff) +- { +- av = (unsigned char)meshlet.vertex_count; +- meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a; +- } +- +- if (bv == 0xff) +- { +- bv = (unsigned char)meshlet.vertex_count; +- meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b; +- } +- +- if (cv == 0xff) +- { +- cv = (unsigned char)meshlet.vertex_count; +- meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c; +- } +- +- meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av; +- meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv; +- meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv; +- meshlet.triangle_count++; +- +- return result; +-} +- +-struct KDNode +-{ +- union +- { +- float split; +- unsigned int index; +- }; +- +- // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point) +- // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children +- unsigned int axis : 2; +- unsigned int children : 30; +-}; +- +-static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot) +-{ +- size_t m = 0; +- +- // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot +- for (size_t i = 0; i < count; ++i) +- { +- float v = points[indices[i] * stride + axis]; +- +- // swap(m, i) unconditionally +- unsigned int t = indices[m]; +- indices[m] = indices[i]; +- indices[i] = t; +- +- // when v >= pivot, we swap i with m without advancing it, preserving invariants +- m += v < pivot; +- } +- +- return m; +-} +- +-static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count) +-{ +- assert(offset + count <= node_count); +- (void)node_count; +- +- KDNode& result = nodes[offset]; +- +- result.index = indices[0]; +- result.axis = 3; +- result.children = unsigned(count - 1); +- +- // all remaining points are stored in nodes immediately following the leaf +- for (size_t i = 1; i < count; ++i) +- { +- KDNode& tail = nodes[offset + i]; +- +- tail.index = indices[i]; +- tail.axis = 3; +- tail.children = ~0u >> 2; // bogus value to prevent misuse +- } +- +- return offset + count; +-} +- +-static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size) +-{ +- assert(count > 0); +- assert(offset < node_count); +- +- if (count <= leaf_size) +- return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); +- +- float mean[3] = {}; +- float vars[3] = {}; +- float runc = 1, runs = 1; +- +- // gather statistics on the points in the subtree using Welford's algorithm +- for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc) +- { +- const float* point = points + indices[i] * stride; +- +- for (int k = 0; k < 3; ++k) +- { +- float delta = point[k] - mean[k]; +- mean[k] += delta * runs; +- vars[k] += delta * (point[k] - mean[k]); +- } +- } +- +- // split axis is one where the variance is largest +- unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2; +- +- float split = mean[axis]; +- size_t middle = kdtreePartition(indices, count, points, stride, axis, split); +- +- // when the partition is degenerate simply consolidate the points into a single node +- if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2) +- return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); +- +- KDNode& result = nodes[offset]; +- +- result.split = split; +- result.axis = axis; +- +- // left subtree is right after our node +- size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size); +- +- // distance to the right subtree is represented explicitly +- result.children = unsigned(next_offset - offset - 1); +- +- return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size); +-} +- +-static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit) +-{ +- const KDNode& node = nodes[root]; +- +- if (node.axis == 3) +- { +- // leaf +- for (unsigned int i = 0; i <= node.children; ++i) +- { +- unsigned int index = nodes[root + i].index; +- +- if (emitted_flags[index]) +- continue; +- +- const float* point = points + index * stride; +- +- float distance2 = +- (point[0] - position[0]) * (point[0] - position[0]) + +- (point[1] - position[1]) * (point[1] - position[1]) + +- (point[2] - position[2]) * (point[2] - position[2]); +- float distance = sqrtf(distance2); +- +- if (distance < limit) +- { +- result = index; +- limit = distance; +- } +- } +- } +- else +- { +- // branch; we order recursion to process the node that search position is in first +- float delta = position[node.axis] - node.split; +- unsigned int first = (delta <= 0) ? 0 : node.children; +- unsigned int second = first ^ node.children; +- +- kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit); +- +- // only process the other node if it can have a match based on closest distance so far +- if (fabsf(delta) <= limit) +- kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit); +- } +-} +- +-} // namespace meshopt +- +-size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); +- assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); +- assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned +- +- (void)kMeshletMaxVertices; +- (void)kMeshletMaxTriangles; +- +- // meshlet construction is limited by max vertices and max triangles per meshlet +- // the worst case is that the input is an unindexed stream since this equally stresses both limits +- // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle +- size_t max_vertices_conservative = max_vertices - 2; +- size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative; +- size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles; +- +- return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; +-} +- +-size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); +- assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); +- assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned +- +- meshopt_Allocator allocator; +- +- TriangleAdjacency2 adjacency = {}; +- buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); +- +- unsigned int* live_triangles = allocator.allocate(vertex_count); +- memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); +- +- size_t face_count = index_count / 3; +- +- unsigned char* emitted_flags = allocator.allocate(face_count); +- memset(emitted_flags, 0, face_count); +- +- // for each triangle, precompute centroid & normal to use for scoring +- Cone* triangles = allocator.allocate(face_count); +- float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride); +- +- // assuming each meshlet is a square patch, expected radius is sqrt(expected area) +- float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f; +- float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f; +- +- // build a kd-tree for nearest neighbor lookup +- unsigned int* kdindices = allocator.allocate(face_count); +- for (size_t i = 0; i < face_count; ++i) +- kdindices[i] = unsigned(i); +- +- KDNode* nodes = allocator.allocate(face_count * 2); +- kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8); +- +- // index of the vertex in the meshlet, 0xff if the vertex isn't used +- unsigned char* used = allocator.allocate(vertex_count); +- memset(used, -1, vertex_count); +- +- meshopt_Meshlet meshlet = {}; +- size_t meshlet_offset = 0; +- +- Cone meshlet_cone_acc = {}; +- +- for (;;) +- { +- unsigned int best_triangle = ~0u; +- unsigned int best_extra = 5; +- float best_score = FLT_MAX; +- +- Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count); +- +- for (size_t i = 0; i < meshlet.vertex_count; ++i) +- { +- unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; +- +- unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; +- size_t neighbours_size = adjacency.counts[index]; +- +- for (size_t j = 0; j < neighbours_size; ++j) +- { +- unsigned int triangle = neighbours[j]; +- assert(!emitted_flags[triangle]); +- +- unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; +- assert(a < vertex_count && b < vertex_count && c < vertex_count); +- +- unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); +- +- // triangles that don't add new vertices to meshlets are max. priority +- if (extra != 0) +- { +- // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets +- if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) +- extra = 0; +- +- extra++; +- } +- +- // since topology-based priority is always more important than the score, we can skip scoring in some cases +- if (extra > best_extra) +- continue; +- +- const Cone& tri_cone = triangles[triangle]; +- +- float distance2 = +- (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) + +- (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) + +- (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz); +- +- float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz; +- +- float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); +- +- // note that topology-based priority is always more important than the score +- // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost +- if (extra < best_extra || score < best_score) +- { +- best_triangle = triangle; +- best_extra = extra; +- best_score = score; +- } +- } +- } +- +- if (best_triangle == ~0u) +- { +- float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz}; +- unsigned int index = ~0u; +- float limit = FLT_MAX; +- +- kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit); +- +- best_triangle = index; +- } +- +- if (best_triangle == ~0u) +- break; +- +- unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2]; +- assert(a < vertex_count && b < vertex_count && c < vertex_count); +- +- // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds +- if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles)) +- { +- meshlet_offset++; +- memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc)); +- } +- +- live_triangles[a]--; +- live_triangles[b]--; +- live_triangles[c]--; +- +- // remove emitted triangle from adjacency data +- // this makes sure that we spend less time traversing these lists on subsequent iterations +- for (size_t k = 0; k < 3; ++k) +- { +- unsigned int index = indices[best_triangle * 3 + k]; +- +- unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; +- size_t neighbours_size = adjacency.counts[index]; +- +- for (size_t i = 0; i < neighbours_size; ++i) +- { +- unsigned int tri = neighbours[i]; +- +- if (tri == best_triangle) +- { +- neighbours[i] = neighbours[neighbours_size - 1]; +- adjacency.counts[index]--; +- break; +- } +- } +- } +- +- // update aggregated meshlet cone data for scoring subsequent triangles +- meshlet_cone_acc.px += triangles[best_triangle].px; +- meshlet_cone_acc.py += triangles[best_triangle].py; +- meshlet_cone_acc.pz += triangles[best_triangle].pz; +- meshlet_cone_acc.nx += triangles[best_triangle].nx; +- meshlet_cone_acc.ny += triangles[best_triangle].ny; +- meshlet_cone_acc.nz += triangles[best_triangle].nz; +- +- emitted_flags[best_triangle] = 1; +- } +- +- if (meshlet.triangle_count) +- { +- finishMeshlet(meshlet, meshlet_triangles); +- +- meshlets[meshlet_offset++] = meshlet; +- } +- +- assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); +- return meshlet_offset; +-} +- +-size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- +- assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); +- assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); +- assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned +- +- meshopt_Allocator allocator; +- +- // index of the vertex in the meshlet, 0xff if the vertex isn't used +- unsigned char* used = allocator.allocate(vertex_count); +- memset(used, -1, vertex_count); +- +- meshopt_Meshlet meshlet = {}; +- size_t meshlet_offset = 0; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; +- assert(a < vertex_count && b < vertex_count && c < vertex_count); +- +- // appends triangle to the meshlet and writes previous meshlet to the output if full +- meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles); +- } +- +- if (meshlet.triangle_count) +- { +- finishMeshlet(meshlet, meshlet_triangles); +- +- meshlets[meshlet_offset++] = meshlet; +- } +- +- assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); +- return meshlet_offset; +-} +- +-meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(index_count / 3 <= kMeshletMaxTriangles); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- (void)vertex_count; +- +- size_t vertex_stride_float = vertex_positions_stride / sizeof(float); +- +- // compute triangle normals and gather triangle corners +- float normals[kMeshletMaxTriangles][3]; +- float corners[kMeshletMaxTriangles][3][3]; +- size_t triangles = 0; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; +- assert(a < vertex_count && b < vertex_count && c < vertex_count); +- +- const float* p0 = vertex_positions + vertex_stride_float * a; +- const float* p1 = vertex_positions + vertex_stride_float * b; +- const float* p2 = vertex_positions + vertex_stride_float * c; +- +- float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; +- float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; +- +- float normalx = p10[1] * p20[2] - p10[2] * p20[1]; +- float normaly = p10[2] * p20[0] - p10[0] * p20[2]; +- float normalz = p10[0] * p20[1] - p10[1] * p20[0]; +- +- float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); +- +- // no need to include degenerate triangles - they will be invisible anyway +- if (area == 0.f) +- continue; +- +- // record triangle normals & corners for future use; normal and corner 0 define a plane equation +- normals[triangles][0] = normalx / area; +- normals[triangles][1] = normaly / area; +- normals[triangles][2] = normalz / area; +- memcpy(corners[triangles][0], p0, 3 * sizeof(float)); +- memcpy(corners[triangles][1], p1, 3 * sizeof(float)); +- memcpy(corners[triangles][2], p2, 3 * sizeof(float)); +- triangles++; +- } +- +- meshopt_Bounds bounds = {}; +- +- // degenerate cluster, no valid triangles => trivial reject (cone data is 0) +- if (triangles == 0) +- return bounds; +- +- // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well +- float psphere[4] = {}; +- computeBoundingSphere(psphere, corners[0], triangles * 3); +- +- float center[3] = {psphere[0], psphere[1], psphere[2]}; +- +- // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis +- float nsphere[4] = {}; +- computeBoundingSphere(nsphere, normals, triangles); +- +- float axis[3] = {nsphere[0], nsphere[1], nsphere[2]}; +- float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); +- float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength; +- +- axis[0] *= invaxislength; +- axis[1] *= invaxislength; +- axis[2] *= invaxislength; +- +- // compute a tight cone around all normals, mindp = cos(angle/2) +- float mindp = 1.f; +- +- for (size_t i = 0; i < triangles; ++i) +- { +- float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2]; +- +- mindp = (dp < mindp) ? dp : mindp; +- } +- +- // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones +- bounds.center[0] = center[0]; +- bounds.center[1] = center[1]; +- bounds.center[2] = center[2]; +- bounds.radius = psphere[3]; +- +- // degenerate cluster, normal cone is larger than a hemisphere => trivial accept +- // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable +- // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful +- if (mindp <= 0.1f) +- { +- bounds.cone_cutoff = 1; +- bounds.cone_cutoff_s8 = 127; +- return bounds; +- } +- +- float maxt = 0; +- +- // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles +- for (size_t i = 0; i < triangles; ++i) +- { +- // dot(center-t*axis-corner, trinormal) = 0 +- // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0 +- float cx = center[0] - corners[i][0][0]; +- float cy = center[1] - corners[i][0][1]; +- float cz = center[2] - corners[i][0][2]; +- +- float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2]; +- float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2]; +- +- // dn should be larger than mindp cutoff above +- assert(dn > 0.f); +- float t = dc / dn; +- +- maxt = (t > maxt) ? t : maxt; +- } +- +- // cone apex should be in the negative half-space of all cluster triangles by construction +- bounds.cone_apex[0] = center[0] - axis[0] * maxt; +- bounds.cone_apex[1] = center[1] - axis[1] * maxt; +- bounds.cone_apex[2] = center[2] - axis[2] * maxt; +- +- // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis +- bounds.cone_axis[0] = axis[0]; +- bounds.cone_axis[1] = axis[1]; +- bounds.cone_axis[2] = axis[2]; +- +- // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone +- // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a)) +- bounds.cone_cutoff = sqrtf(1 - mindp * mindp); +- +- // quantize axis & cutoff to 8-bit SNORM format +- bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8)); +- bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8)); +- bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8)); +- +- // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error +- float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]); +- float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]); +- float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]); +- +- // note that we need to round this up instead of rounding to nearest, hence +1 +- int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1); +- +- bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8); +- +- return bounds; +-} +- +-meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(triangle_count <= kMeshletMaxTriangles); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- unsigned int indices[kMeshletMaxTriangles * 3]; +- +- for (size_t i = 0; i < triangle_count * 3; ++i) +- { +- unsigned int index = meshlet_vertices[meshlet_triangles[i]]; +- assert(index < vertex_count); +- +- indices[i] = index; +- } +- +- return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); +-} +diff --git a/src/3rdparty/meshoptimizer/src/indexcodec.cpp b/src/3rdparty/meshoptimizer/src/indexcodec.cpp +deleted file mode 100644 +index e4495b8..0000000 +--- a/src/3rdparty/meshoptimizer/src/indexcodec.cpp ++++ /dev/null +@@ -1,674 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-// This work is based on: +-// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013 +-// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014 +-namespace meshopt +-{ +- +-const unsigned char kIndexHeader = 0xe0; +-const unsigned char kSequenceHeader = 0xd0; +- +-static int gEncodeIndexVersion = 0; +- +-typedef unsigned int VertexFifo[16]; +-typedef unsigned int EdgeFifo[16][2]; +- +-static const unsigned int kTriangleIndexOrder[3][3] = { +- {0, 1, 2}, +- {1, 2, 0}, +- {2, 0, 1}, +-}; +- +-static const unsigned char kCodeAuxEncodingTable[16] = { +- 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69, +- 0, 0, // last two entries aren't used for encoding +-}; +- +-static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next) +-{ +- (void)a; +- +- return (b == next) ? 1 : (c == next) ? 2 : 0; +-} +- +-static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset) +-{ +- for (int i = 0; i < 16; ++i) +- { +- size_t index = (offset - 1 - i) & 15; +- +- unsigned int e0 = fifo[index][0]; +- unsigned int e1 = fifo[index][1]; +- +- if (e0 == a && e1 == b) +- return (i << 2) | 0; +- if (e0 == b && e1 == c) +- return (i << 2) | 1; +- if (e0 == c && e1 == a) +- return (i << 2) | 2; +- } +- +- return -1; +-} +- +-static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset) +-{ +- fifo[offset][0] = a; +- fifo[offset][1] = b; +- offset = (offset + 1) & 15; +-} +- +-static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset) +-{ +- for (int i = 0; i < 16; ++i) +- { +- size_t index = (offset - 1 - i) & 15; +- +- if (fifo[index] == v) +- return i; +- } +- +- return -1; +-} +- +-static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1) +-{ +- fifo[offset] = v; +- offset = (offset + cond) & 15; +-} +- +-static void encodeVByte(unsigned char*& data, unsigned int v) +-{ +- // encode 32-bit value in up to 5 7-bit groups +- do +- { +- *data++ = (v & 127) | (v > 127 ? 128 : 0); +- v >>= 7; +- } while (v); +-} +- +-static unsigned int decodeVByte(const unsigned char*& data) +-{ +- unsigned char lead = *data++; +- +- // fast path: single byte +- if (lead < 128) +- return lead; +- +- // slow path: up to 4 extra bytes +- // note that this loop always terminates, which is important for malformed data +- unsigned int result = lead & 127; +- unsigned int shift = 7; +- +- for (int i = 0; i < 4; ++i) +- { +- unsigned char group = *data++; +- result |= unsigned(group & 127) << shift; +- shift += 7; +- +- if (group < 128) +- break; +- } +- +- return result; +-} +- +-static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last) +-{ +- unsigned int d = index - last; +- unsigned int v = (d << 1) ^ (int(d) >> 31); +- +- encodeVByte(data, v); +-} +- +-static unsigned int decodeIndex(const unsigned char*& data, unsigned int last) +-{ +- unsigned int v = decodeVByte(data); +- unsigned int d = (v >> 1) ^ -int(v & 1); +- +- return last + d; +-} +- +-static int getCodeAuxIndex(unsigned char v, const unsigned char* table) +-{ +- for (int i = 0; i < 16; ++i) +- if (table[i] == v) +- return i; +- +- return -1; +-} +- +-static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c) +-{ +- if (index_size == 2) +- { +- static_cast(destination)[offset + 0] = (unsigned short)(a); +- static_cast(destination)[offset + 1] = (unsigned short)(b); +- static_cast(destination)[offset + 2] = (unsigned short)(c); +- } +- else +- { +- static_cast(destination)[offset + 0] = a; +- static_cast(destination)[offset + 1] = b; +- static_cast(destination)[offset + 2] = c; +- } +-} +- +-} // namespace meshopt +- +-size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- +- // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table +- if (buffer_size < 1 + index_count / 3 + 16) +- return 0; +- +- int version = gEncodeIndexVersion; +- +- buffer[0] = (unsigned char)(kIndexHeader | version); +- +- EdgeFifo edgefifo; +- memset(edgefifo, -1, sizeof(edgefifo)); +- +- VertexFifo vertexfifo; +- memset(vertexfifo, -1, sizeof(vertexfifo)); +- +- size_t edgefifooffset = 0; +- size_t vertexfifooffset = 0; +- +- unsigned int next = 0; +- unsigned int last = 0; +- +- unsigned char* code = buffer + 1; +- unsigned char* data = code + index_count / 3; +- unsigned char* data_safe_end = buffer + buffer_size - 16; +- +- int fecmax = version >= 1 ? 13 : 15; +- +- // use static encoding table; it's possible to pack the result and then build an optimal table and repack +- // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set +- const unsigned char* codeaux_table = kCodeAuxEncodingTable; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- // make sure we have enough space to write a triangle +- // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index +- // after this we can be sure we can write without extra bounds checks +- if (data > data_safe_end) +- return 0; +- +- int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset); +- +- if (fer >= 0 && (fer >> 2) < 15) +- { +- const unsigned int* order = kTriangleIndexOrder[fer & 3]; +- +- unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; +- +- // encode edge index and vertex fifo index, next or free index +- int fe = fer >> 2; +- int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); +- +- int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15; +- +- if (fec == 15 && version >= 1) +- { +- // encode last-1 and last+1 to optimize strip-like sequences +- if (c + 1 == last) +- fec = 13, last = c; +- if (c == last + 1) +- fec = 14, last = c; +- } +- +- *code++ = (unsigned char)((fe << 4) | fec); +- +- // note that we need to update the last index since free indices are delta-encoded +- if (fec == 15) +- encodeIndex(data, c, last), last = c; +- +- // we only need to push third vertex since first two are likely already in the vertex fifo +- if (fec == 0 || fec >= fecmax) +- pushVertexFifo(vertexfifo, c, vertexfifooffset); +- +- // we only need to push two new edges to edge fifo since the third one is already there +- pushEdgeFifo(edgefifo, c, b, edgefifooffset); +- pushEdgeFifo(edgefifo, a, c, edgefifooffset); +- } +- else +- { +- int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next); +- const unsigned int* order = kTriangleIndexOrder[rotation]; +- +- unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; +- +- // if a/b/c are 0/1/2, we emit a reset code +- bool reset = false; +- +- if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1) +- { +- reset = true; +- next = 0; +- +- // reset vertex fifo to make sure we don't accidentally reference vertices from that in the future +- // this makes sure next continues to get incremented instead of being stuck +- memset(vertexfifo, -1, sizeof(vertexfifo)); +- } +- +- int fb = getVertexFifo(vertexfifo, b, vertexfifooffset); +- int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); +- +- // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a +- int fea = (a == next) ? (next++, 0) : 15; +- int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15; +- int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15; +- +- // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise +- unsigned char codeaux = (unsigned char)((feb << 4) | fec); +- int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table); +- +- // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15 +- if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset) +- { +- *code++ = (unsigned char)((15 << 4) | codeauxindex); +- } +- else +- { +- *code++ = (unsigned char)((15 << 4) | 14 | fea); +- *data++ = codeaux; +- } +- +- // note that we need to update the last index since free indices are delta-encoded +- if (fea == 15) +- encodeIndex(data, a, last), last = a; +- +- if (feb == 15) +- encodeIndex(data, b, last), last = b; +- +- if (fec == 15) +- encodeIndex(data, c, last), last = c; +- +- // only push vertices that weren't already in fifo +- if (fea == 0 || fea == 15) +- pushVertexFifo(vertexfifo, a, vertexfifooffset); +- +- if (feb == 0 || feb == 15) +- pushVertexFifo(vertexfifo, b, vertexfifooffset); +- +- if (fec == 0 || fec == 15) +- pushVertexFifo(vertexfifo, c, vertexfifooffset); +- +- // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles +- pushEdgeFifo(edgefifo, b, a, edgefifooffset); +- pushEdgeFifo(edgefifo, c, b, edgefifooffset); +- pushEdgeFifo(edgefifo, a, c, edgefifooffset); +- } +- } +- +- // make sure we have enough space to write codeaux table +- if (data > data_safe_end) +- return 0; +- +- // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding +- // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data +- // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input +- for (size_t i = 0; i < 16; ++i) +- { +- // decoder assumes that table entries never refer to separately encoded indices +- assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf); +- +- *data++ = codeaux_table[i]; +- } +- +- // since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference +- assert(codeaux_table[0] == 0); +- +- assert(data >= buffer + index_count / 3 + 16); +- assert(data <= buffer + buffer_size); +- +- return data - buffer; +-} +- +-size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count) +-{ +- assert(index_count % 3 == 0); +- +- // compute number of bits required for each index +- unsigned int vertex_bits = 1; +- +- while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) +- vertex_bits++; +- +- // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas +- unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7; +- +- return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16; +-} +- +-void meshopt_encodeIndexVersion(int version) +-{ +- assert(unsigned(version) <= 1); +- +- meshopt::gEncodeIndexVersion = version; +-} +- +-int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(index_size == 2 || index_size == 4); +- +- // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table +- if (buffer_size < 1 + index_count / 3 + 16) +- return -2; +- +- if ((buffer[0] & 0xf0) != kIndexHeader) +- return -1; +- +- int version = buffer[0] & 0x0f; +- if (version > 1) +- return -1; +- +- EdgeFifo edgefifo; +- memset(edgefifo, -1, sizeof(edgefifo)); +- +- VertexFifo vertexfifo; +- memset(vertexfifo, -1, sizeof(vertexfifo)); +- +- size_t edgefifooffset = 0; +- size_t vertexfifooffset = 0; +- +- unsigned int next = 0; +- unsigned int last = 0; +- +- int fecmax = version >= 1 ? 13 : 15; +- +- // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end +- const unsigned char* code = buffer + 1; +- const unsigned char* data = code + index_count / 3; +- const unsigned char* data_safe_end = buffer + buffer_size - 16; +- +- const unsigned char* codeaux_table = data_safe_end; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- // make sure we have enough data to read for a triangle +- // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index +- // after this we can be sure we can read without extra bounds checks +- if (data > data_safe_end) +- return -2; +- +- unsigned char codetri = *code++; +- +- if (codetri < 0xf0) +- { +- int fe = codetri >> 4; +- +- // fifo reads are wrapped around 16 entry buffer +- unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0]; +- unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1]; +- +- int fec = codetri & 15; +- +- // note: this is the most common path in the entire decoder +- // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable +- if (fec < fecmax) +- { +- // fifo reads are wrapped around 16 entry buffer +- unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15]; +- unsigned int c = (fec == 0) ? next : cf; +- +- int fec0 = fec == 0; +- next += fec0; +- +- // output triangle +- writeTriangle(destination, i, index_size, a, b, c); +- +- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly +- pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); +- +- pushEdgeFifo(edgefifo, c, b, edgefifooffset); +- pushEdgeFifo(edgefifo, a, c, edgefifooffset); +- } +- else +- { +- unsigned int c = 0; +- +- // fec - (fec ^ 3) decodes 13, 14 into -1, 1 +- // note that we need to update the last index since free indices are delta-encoded +- last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last); +- +- // output triangle +- writeTriangle(destination, i, index_size, a, b, c); +- +- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly +- pushVertexFifo(vertexfifo, c, vertexfifooffset); +- +- pushEdgeFifo(edgefifo, c, b, edgefifooffset); +- pushEdgeFifo(edgefifo, a, c, edgefifooffset); +- } +- } +- else +- { +- // fast path: read codeaux from the table +- if (codetri < 0xfe) +- { +- unsigned char codeaux = codeaux_table[codetri & 15]; +- +- // note: table can't contain feb/fec=15 +- int feb = codeaux >> 4; +- int fec = codeaux & 15; +- +- // fifo reads are wrapped around 16 entry buffer +- // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior +- unsigned int a = next++; +- +- unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15]; +- unsigned int b = (feb == 0) ? next : bf; +- +- int feb0 = feb == 0; +- next += feb0; +- +- unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15]; +- unsigned int c = (fec == 0) ? next : cf; +- +- int fec0 = fec == 0; +- next += fec0; +- +- // output triangle +- writeTriangle(destination, i, index_size, a, b, c); +- +- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly +- pushVertexFifo(vertexfifo, a, vertexfifooffset); +- pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0); +- pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); +- +- pushEdgeFifo(edgefifo, b, a, edgefifooffset); +- pushEdgeFifo(edgefifo, c, b, edgefifooffset); +- pushEdgeFifo(edgefifo, a, c, edgefifooffset); +- } +- else +- { +- // slow path: read a full byte for codeaux instead of using a table lookup +- unsigned char codeaux = *data++; +- +- int fea = codetri == 0xfe ? 0 : 15; +- int feb = codeaux >> 4; +- int fec = codeaux & 15; +- +- // reset: codeaux is 0 but encoded as not-a-table +- if (codeaux == 0) +- next = 0; +- +- // fifo reads are wrapped around 16 entry buffer +- // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior +- unsigned int a = (fea == 0) ? next++ : 0; +- unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15]; +- unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15]; +- +- // note that we need to update the last index since free indices are delta-encoded +- if (fea == 15) +- last = a = decodeIndex(data, last); +- +- if (feb == 15) +- last = b = decodeIndex(data, last); +- +- if (fec == 15) +- last = c = decodeIndex(data, last); +- +- // output triangle +- writeTriangle(destination, i, index_size, a, b, c); +- +- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly +- pushVertexFifo(vertexfifo, a, vertexfifooffset); +- pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15)); +- pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15)); +- +- pushEdgeFifo(edgefifo, b, a, edgefifooffset); +- pushEdgeFifo(edgefifo, c, b, edgefifooffset); +- pushEdgeFifo(edgefifo, a, c, edgefifooffset); +- } +- } +- } +- +- // we should've read all data bytes and stopped at the boundary between data and codeaux table +- if (data != data_safe_end) +- return -3; +- +- return 0; +-} +- +-size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +-{ +- using namespace meshopt; +- +- // the minimum valid encoding is header, 1 byte per index and a 4-byte tail +- if (buffer_size < 1 + index_count + 4) +- return 0; +- +- int version = gEncodeIndexVersion; +- +- buffer[0] = (unsigned char)(kSequenceHeader | version); +- +- unsigned int last[2] = {}; +- unsigned int current = 0; +- +- unsigned char* data = buffer + 1; +- unsigned char* data_safe_end = buffer + buffer_size - 4; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- // make sure we have enough data to write +- // each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end +- // after this we can be sure we can write without extra bounds checks +- if (data >= data_safe_end) +- return 0; +- +- unsigned int index = indices[i]; +- +- // this is a heuristic that switches between baselines when the delta grows too large +- // we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index +- // for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily +- int cd = int(index - last[current]); +- current ^= ((cd < 0 ? -cd : cd) >= 30); +- +- // encode delta from the last index +- unsigned int d = index - last[current]; +- unsigned int v = (d << 1) ^ (int(d) >> 31); +- +- // note: low bit encodes the index of the last baseline which will be used for reconstruction +- encodeVByte(data, (v << 1) | current); +- +- // update last for the next iteration that uses it +- last[current] = index; +- } +- +- // make sure we have enough space to write tail +- if (data > data_safe_end) +- return 0; +- +- for (int k = 0; k < 4; ++k) +- *data++ = 0; +- +- return data - buffer; +-} +- +-size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count) +-{ +- // compute number of bits required for each index +- unsigned int vertex_bits = 1; +- +- while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) +- vertex_bits++; +- +- // worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit +- unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7; +- +- return 1 + index_count * vertex_groups + 4; +-} +- +-int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +-{ +- using namespace meshopt; +- +- // the minimum valid encoding is header, 1 byte per index and a 4-byte tail +- if (buffer_size < 1 + index_count + 4) +- return -2; +- +- if ((buffer[0] & 0xf0) != kSequenceHeader) +- return -1; +- +- int version = buffer[0] & 0x0f; +- if (version > 1) +- return -1; +- +- const unsigned char* data = buffer + 1; +- const unsigned char* data_safe_end = buffer + buffer_size - 4; +- +- unsigned int last[2] = {}; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- // make sure we have enough data to read +- // each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end +- // after this we can be sure we can read without extra bounds checks +- if (data >= data_safe_end) +- return -2; +- +- unsigned int v = decodeVByte(data); +- +- // decode the index of the last baseline +- unsigned int current = v & 1; +- v >>= 1; +- +- // reconstruct index as a delta +- unsigned int d = (v >> 1) ^ -int(v & 1); +- unsigned int index = last[current] + d; +- +- // update last for the next iteration that uses it +- last[current] = index; +- +- if (index_size == 2) +- { +- static_cast(destination)[i] = (unsigned short)(index); +- } +- else +- { +- static_cast(destination)[i] = index; +- } +- } +- +- // we should've read all data bytes and stopped at the boundary between data and tail +- if (data != data_safe_end) +- return -3; +- +- return 0; +-} +diff --git a/src/3rdparty/meshoptimizer/src/indexgenerator.cpp b/src/3rdparty/meshoptimizer/src/indexgenerator.cpp +deleted file mode 100644 +index 9a25c21..0000000 +--- a/src/3rdparty/meshoptimizer/src/indexgenerator.cpp ++++ /dev/null +@@ -1,551 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-// This work is based on: +-// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010 +-namespace meshopt +-{ +- +-static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len) +-{ +- // MurmurHash2 +- const unsigned int m = 0x5bd1e995; +- const int r = 24; +- +- while (len >= 4) +- { +- unsigned int k = *reinterpret_cast(key); +- +- k *= m; +- k ^= k >> r; +- k *= m; +- +- h *= m; +- h ^= k; +- +- key += 4; +- len -= 4; +- } +- +- return h; +-} +- +-struct VertexHasher +-{ +- const unsigned char* vertices; +- size_t vertex_size; +- size_t vertex_stride; +- +- size_t hash(unsigned int index) const +- { +- return hashUpdate4(0, vertices + index * vertex_stride, vertex_size); +- } +- +- bool equal(unsigned int lhs, unsigned int rhs) const +- { +- return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0; +- } +-}; +- +-struct VertexStreamHasher +-{ +- const meshopt_Stream* streams; +- size_t stream_count; +- +- size_t hash(unsigned int index) const +- { +- unsigned int h = 0; +- +- for (size_t i = 0; i < stream_count; ++i) +- { +- const meshopt_Stream& s = streams[i]; +- const unsigned char* data = static_cast(s.data); +- +- h = hashUpdate4(h, data + index * s.stride, s.size); +- } +- +- return h; +- } +- +- bool equal(unsigned int lhs, unsigned int rhs) const +- { +- for (size_t i = 0; i < stream_count; ++i) +- { +- const meshopt_Stream& s = streams[i]; +- const unsigned char* data = static_cast(s.data); +- +- if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0) +- return false; +- } +- +- return true; +- } +-}; +- +-struct EdgeHasher +-{ +- const unsigned int* remap; +- +- size_t hash(unsigned long long edge) const +- { +- unsigned int e0 = unsigned(edge >> 32); +- unsigned int e1 = unsigned(edge); +- +- unsigned int h1 = remap[e0]; +- unsigned int h2 = remap[e1]; +- +- const unsigned int m = 0x5bd1e995; +- +- // MurmurHash64B finalizer +- h1 ^= h2 >> 18; +- h1 *= m; +- h2 ^= h1 >> 22; +- h2 *= m; +- h1 ^= h2 >> 17; +- h1 *= m; +- h2 ^= h1 >> 19; +- h2 *= m; +- +- return h2; +- } +- +- bool equal(unsigned long long lhs, unsigned long long rhs) const +- { +- unsigned int l0 = unsigned(lhs >> 32); +- unsigned int l1 = unsigned(lhs); +- +- unsigned int r0 = unsigned(rhs >> 32); +- unsigned int r1 = unsigned(rhs); +- +- return remap[l0] == remap[r0] && remap[l1] == remap[r1]; +- } +-}; +- +-static size_t hashBuckets(size_t count) +-{ +- size_t buckets = 1; +- while (buckets < count + count / 4) +- buckets *= 2; +- +- return buckets; +-} +- +-template +-static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty) +-{ +- assert(buckets > 0); +- assert((buckets & (buckets - 1)) == 0); +- +- size_t hashmod = buckets - 1; +- size_t bucket = hash.hash(key) & hashmod; +- +- for (size_t probe = 0; probe <= hashmod; ++probe) +- { +- T& item = table[bucket]; +- +- if (item == empty) +- return &item; +- +- if (hash.equal(item, key)) +- return &item; +- +- // hash collision, quadratic probing +- bucket = (bucket + probe + 1) & hashmod; +- } +- +- assert(false && "Hash table is full"); // unreachable +- return 0; +-} +- +-static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) +-{ +- VertexHasher vertex_hasher = {reinterpret_cast(vertex_positions), 3 * sizeof(float), vertex_positions_stride}; +- +- size_t vertex_table_size = hashBuckets(vertex_count); +- unsigned int* vertex_table = allocator.allocate(vertex_table_size); +- memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int)); +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned int index = unsigned(i); +- unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u); +- +- if (*entry == ~0u) +- *entry = index; +- +- remap[index] = *entry; +- } +-} +- +-} // namespace meshopt +- +-size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +-{ +- using namespace meshopt; +- +- assert(indices || index_count == vertex_count); +- assert(!indices || index_count % 3 == 0); +- assert(vertex_size > 0 && vertex_size <= 256); +- +- meshopt_Allocator allocator; +- +- memset(destination, -1, vertex_count * sizeof(unsigned int)); +- +- VertexHasher hasher = {static_cast(vertices), vertex_size, vertex_size}; +- +- size_t table_size = hashBuckets(vertex_count); +- unsigned int* table = allocator.allocate(table_size); +- memset(table, -1, table_size * sizeof(unsigned int)); +- +- unsigned int next_vertex = 0; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices ? indices[i] : unsigned(i); +- assert(index < vertex_count); +- +- if (destination[index] == ~0u) +- { +- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); +- +- if (*entry == ~0u) +- { +- *entry = index; +- +- destination[index] = next_vertex++; +- } +- else +- { +- assert(destination[*entry] != ~0u); +- +- destination[index] = destination[*entry]; +- } +- } +- } +- +- assert(next_vertex <= vertex_count); +- +- return next_vertex; +-} +- +-size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) +-{ +- using namespace meshopt; +- +- assert(indices || index_count == vertex_count); +- assert(index_count % 3 == 0); +- assert(stream_count > 0 && stream_count <= 16); +- +- for (size_t i = 0; i < stream_count; ++i) +- { +- assert(streams[i].size > 0 && streams[i].size <= 256); +- assert(streams[i].size <= streams[i].stride); +- } +- +- meshopt_Allocator allocator; +- +- memset(destination, -1, vertex_count * sizeof(unsigned int)); +- +- VertexStreamHasher hasher = {streams, stream_count}; +- +- size_t table_size = hashBuckets(vertex_count); +- unsigned int* table = allocator.allocate(table_size); +- memset(table, -1, table_size * sizeof(unsigned int)); +- +- unsigned int next_vertex = 0; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices ? indices[i] : unsigned(i); +- assert(index < vertex_count); +- +- if (destination[index] == ~0u) +- { +- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); +- +- if (*entry == ~0u) +- { +- *entry = index; +- +- destination[index] = next_vertex++; +- } +- else +- { +- assert(destination[*entry] != ~0u); +- +- destination[index] = destination[*entry]; +- } +- } +- } +- +- assert(next_vertex <= vertex_count); +- +- return next_vertex; +-} +- +-void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) +-{ +- assert(vertex_size > 0 && vertex_size <= 256); +- +- meshopt_Allocator allocator; +- +- // support in-place remap +- if (destination == vertices) +- { +- unsigned char* vertices_copy = allocator.allocate(vertex_count * vertex_size); +- memcpy(vertices_copy, vertices, vertex_count * vertex_size); +- vertices = vertices_copy; +- } +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- if (remap[i] != ~0u) +- { +- assert(remap[i] < vertex_count); +- +- memcpy(static_cast(destination) + remap[i] * vertex_size, static_cast(vertices) + i * vertex_size, vertex_size); +- } +- } +-} +- +-void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap) +-{ +- assert(index_count % 3 == 0); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices ? indices[i] : unsigned(i); +- assert(remap[index] != ~0u); +- +- destination[i] = remap[index]; +- } +-} +- +-void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) +-{ +- using namespace meshopt; +- +- assert(indices); +- assert(index_count % 3 == 0); +- assert(vertex_size > 0 && vertex_size <= 256); +- assert(vertex_size <= vertex_stride); +- +- meshopt_Allocator allocator; +- +- unsigned int* remap = allocator.allocate(vertex_count); +- memset(remap, -1, vertex_count * sizeof(unsigned int)); +- +- VertexHasher hasher = {static_cast(vertices), vertex_size, vertex_stride}; +- +- size_t table_size = hashBuckets(vertex_count); +- unsigned int* table = allocator.allocate(table_size); +- memset(table, -1, table_size * sizeof(unsigned int)); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices[i]; +- assert(index < vertex_count); +- +- if (remap[index] == ~0u) +- { +- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); +- +- if (*entry == ~0u) +- *entry = index; +- +- remap[index] = *entry; +- } +- +- destination[i] = remap[index]; +- } +-} +- +-void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) +-{ +- using namespace meshopt; +- +- assert(indices); +- assert(index_count % 3 == 0); +- assert(stream_count > 0 && stream_count <= 16); +- +- for (size_t i = 0; i < stream_count; ++i) +- { +- assert(streams[i].size > 0 && streams[i].size <= 256); +- assert(streams[i].size <= streams[i].stride); +- } +- +- meshopt_Allocator allocator; +- +- unsigned int* remap = allocator.allocate(vertex_count); +- memset(remap, -1, vertex_count * sizeof(unsigned int)); +- +- VertexStreamHasher hasher = {streams, stream_count}; +- +- size_t table_size = hashBuckets(vertex_count); +- unsigned int* table = allocator.allocate(table_size); +- memset(table, -1, table_size * sizeof(unsigned int)); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices[i]; +- assert(index < vertex_count); +- +- if (remap[index] == ~0u) +- { +- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); +- +- if (*entry == ~0u) +- *entry = index; +- +- remap[index] = *entry; +- } +- +- destination[i] = remap[index]; +- } +-} +- +-void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- meshopt_Allocator allocator; +- +- static const int next[4] = {1, 2, 0, 1}; +- +- // build position remap: for each vertex, which other (canonical) vertex does it map to? +- unsigned int* remap = allocator.allocate(vertex_count); +- buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator); +- +- // build edge set; this stores all triangle edges but we can look these up by any other wedge +- EdgeHasher edge_hasher = {remap}; +- +- size_t edge_table_size = hashBuckets(index_count); +- unsigned long long* edge_table = allocator.allocate(edge_table_size); +- unsigned int* edge_vertex_table = allocator.allocate(edge_table_size); +- +- memset(edge_table, -1, edge_table_size * sizeof(unsigned long long)); +- memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int)); +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- for (int e = 0; e < 3; ++e) +- { +- unsigned int i0 = indices[i + e]; +- unsigned int i1 = indices[i + next[e]]; +- unsigned int i2 = indices[i + next[e + 1]]; +- assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count); +- +- unsigned long long edge = ((unsigned long long)i0 << 32) | i1; +- unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); +- +- if (*entry == ~0ull) +- { +- *entry = edge; +- +- // store vertex opposite to the edge +- edge_vertex_table[entry - edge_table] = i2; +- } +- } +- } +- +- // build resulting index buffer: 6 indices for each input triangle +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int patch[6]; +- +- for (int e = 0; e < 3; ++e) +- { +- unsigned int i0 = indices[i + e]; +- unsigned int i1 = indices[i + next[e]]; +- assert(i0 < vertex_count && i1 < vertex_count); +- +- // note: this refers to the opposite edge! +- unsigned long long edge = ((unsigned long long)i1 << 32) | i0; +- unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); +- +- patch[e * 2 + 0] = i0; +- patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table]; +- } +- +- memcpy(destination + i * 2, patch, sizeof(patch)); +- } +-} +- +-void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- meshopt_Allocator allocator; +- +- static const int next[3] = {1, 2, 0}; +- +- // build position remap: for each vertex, which other (canonical) vertex does it map to? +- unsigned int* remap = allocator.allocate(vertex_count); +- buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator); +- +- // build edge set; this stores all triangle edges but we can look these up by any other wedge +- EdgeHasher edge_hasher = {remap}; +- +- size_t edge_table_size = hashBuckets(index_count); +- unsigned long long* edge_table = allocator.allocate(edge_table_size); +- memset(edge_table, -1, edge_table_size * sizeof(unsigned long long)); +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- for (int e = 0; e < 3; ++e) +- { +- unsigned int i0 = indices[i + e]; +- unsigned int i1 = indices[i + next[e]]; +- assert(i0 < vertex_count && i1 < vertex_count); +- +- unsigned long long edge = ((unsigned long long)i0 << 32) | i1; +- unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); +- +- if (*entry == ~0ull) +- *entry = edge; +- } +- } +- +- // build resulting index buffer: 12 indices for each input triangle +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int patch[12]; +- +- for (int e = 0; e < 3; ++e) +- { +- unsigned int i0 = indices[i + e]; +- unsigned int i1 = indices[i + next[e]]; +- assert(i0 < vertex_count && i1 < vertex_count); +- +- // note: this refers to the opposite edge! +- unsigned long long edge = ((unsigned long long)i1 << 32) | i0; +- unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); +- +- // use the same edge if opposite edge doesn't exist (border) +- oppe = (oppe == ~0ull) ? edge : oppe; +- +- // triangle index (0, 1, 2) +- patch[e] = i0; +- +- // opposite edge (3, 4; 5, 6; 7, 8) +- patch[3 + e * 2 + 0] = unsigned(oppe); +- patch[3 + e * 2 + 1] = unsigned(oppe >> 32); +- +- // dominant vertex (9, 10, 11) +- patch[9 + e] = remap[i0]; +- } +- +- memcpy(destination + i * 4, patch, sizeof(patch)); +- } +-} +diff --git a/src/3rdparty/meshoptimizer/src/meshoptimizer.h b/src/3rdparty/meshoptimizer/src/meshoptimizer.h +deleted file mode 100644 +index f94dbaf..0000000 +--- a/src/3rdparty/meshoptimizer/src/meshoptimizer.h ++++ /dev/null +@@ -1,1069 +0,0 @@ +-/** +- * meshoptimizer - version 0.18 +- * +- * Copyright (C) 2016-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) +- * Report bugs and download new versions at https://github.com/zeux/meshoptimizer +- * +- * This library is distributed under the MIT License. See notice at the end of this file. +- */ +-#pragma once +- +-#include +-#include +- +-/* Version macro; major * 1000 + minor * 10 + patch */ +-#define MESHOPTIMIZER_VERSION 180 /* 0.18 */ +- +-/* If no API is defined, assume default */ +-#ifndef MESHOPTIMIZER_API +-#define MESHOPTIMIZER_API +-#endif +- +-/* Set the calling-convention for alloc/dealloc function pointers */ +-#ifndef MESHOPTIMIZER_ALLOC_CALLCONV +-#ifdef _MSC_VER +-#define MESHOPTIMIZER_ALLOC_CALLCONV __cdecl +-#else +-#define MESHOPTIMIZER_ALLOC_CALLCONV +-#endif +-#endif +- +-/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */ +-#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API +- +-/* C interface */ +-#ifdef __cplusplus +-extern "C" { +-#endif +- +-/** +- * Vertex attribute stream, similar to glVertexPointer +- * Each element takes size bytes, with stride controlling the spacing between successive elements. +- */ +-struct meshopt_Stream +-{ +- const void* data; +- size_t size; +- size_t stride; +-}; +- +-/** +- * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices +- * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. +- * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. +- * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. +- * +- * destination must contain enough space for the resulting remap table (vertex_count elements) +- * indices can be NULL if the input is unindexed +- */ +-MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +- +-/** +- * Generates a vertex remap table from multiple vertex streams and an optional index buffer and returns number of unique vertices +- * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. +- * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. +- * To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream. +- * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. +- * +- * destination must contain enough space for the resulting remap table (vertex_count elements) +- * indices can be NULL if the input is unindexed +- */ +-MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); +- +-/** +- * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap +- * +- * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by meshopt_generateVertexRemap) +- * vertex_count should be the initial vertex count and not the value returned by meshopt_generateVertexRemap +- */ +-MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap); +- +-/** +- * Generate index buffer from the source index buffer and remap table generated by meshopt_generateVertexRemap +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- * indices can be NULL if the input is unindexed +- */ +-MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap); +- +-/** +- * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary +- * All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer. +- * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. +- * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- */ +-MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); +- +-/** +- * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary +- * All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer. +- * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. +- * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- */ +-MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); +- +-/** +- * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology +- * Each triangle is converted into a 6-vertex patch with the following layout: +- * - 0, 2, 4: original triangle vertices +- * - 1, 3, 5: vertices adjacent to edges 02, 24 and 40 +- * The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY. +- * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering. +- * +- * destination must contain enough space for the resulting index buffer (index_count*2 elements) +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- */ +-MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +- +-/** +- * Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement +- * Each triangle is converted into a 12-vertex patch with the following layout: +- * - 0, 1, 2: original triangle vertices +- * - 3, 4: opposing edge for edge 0, 1 +- * - 5, 6: opposing edge for edge 1, 2 +- * - 7, 8: opposing edge for edge 2, 0 +- * - 9, 10, 11: dominant vertices for corners 0, 1, 2 +- * The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping. +- * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details. +- * +- * destination must contain enough space for the resulting index buffer (index_count*4 elements) +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- */ +-MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +- +-/** +- * Vertex transform cache optimizer +- * Reorders indices to reduce the number of GPU vertex shader invocations +- * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- */ +-MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); +- +-/** +- * Vertex transform cache optimizer for strip-like caches +- * Produces inferior results to meshopt_optimizeVertexCache from the GPU vertex cache perspective +- * However, the resulting index order is more optimal if the goal is to reduce the triangle strip length or improve compression efficiency +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- */ +-MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); +- +-/** +- * Vertex transform cache optimizer for FIFO caches +- * Reorders indices to reduce the number of GPU vertex shader invocations +- * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache +- * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- * cache_size should be less than the actual GPU cache size to avoid cache thrashing +- */ +-MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); +- +-/** +- * Overdraw optimizer +- * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw +- * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!) +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently +- */ +-MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); +- +-/** +- * Vertex fetch cache optimizer +- * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing +- * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused +- * This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream. +- * +- * destination must contain enough space for the resulting vertex buffer (vertex_count elements) +- * indices is used both as an input and as an output index buffer +- */ +-MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +- +-/** +- * Vertex fetch cache optimizer +- * Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing +- * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused +- * The resulting remap table should be used to reorder vertex/index buffers using meshopt_remapVertexBuffer/meshopt_remapIndexBuffer +- * +- * destination must contain enough space for the resulting remap table (vertex_count elements) +- */ +-MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); +- +-/** +- * Index buffer encoder +- * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original. +- * Input index buffer must represent a triangle list. +- * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space +- * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first. +- * +- * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to compute worst case size) +- */ +-MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +-MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count); +- +-/** +- * Set index encoder format version +- * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+) +- */ +-MESHOPTIMIZER_API void meshopt_encodeIndexVersion(int version); +- +-/** +- * Index buffer decoder +- * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer +- * Returns 0 if decoding was successful, and an error code otherwise +- * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- */ +-MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); +- +-/** +- * Index sequence encoder +- * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original. +- * Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better. +- * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space +- * +- * buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size) +- */ +-MESHOPTIMIZER_API size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +-MESHOPTIMIZER_API size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count); +- +-/** +- * Index sequence decoder +- * Decodes index data from an array of bytes generated by meshopt_encodeIndexSequence +- * Returns 0 if decoding was successful, and an error code otherwise +- * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). +- * +- * destination must contain enough space for the resulting index sequence (index_count elements) +- */ +-MESHOPTIMIZER_API int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); +- +-/** +- * Vertex buffer encoder +- * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original. +- * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space +- * This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream. +- * Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized. +- * +- * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size) +- */ +-MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size); +-MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size); +- +-/** +- * Set vertex encoder format version +- * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) +- */ +-MESHOPTIMIZER_API void meshopt_encodeVertexVersion(int version); +- +-/** +- * Vertex buffer decoder +- * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer +- * Returns 0 if decoding was successful, and an error code otherwise +- * The decoder is safe to use for untrusted input, but it may produce garbage data. +- * +- * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes) +- */ +-MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size); +- +-/** +- * Vertex buffer filters +- * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place. +- * +- * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f. +- * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. +- * +- * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct. +- * Each component is stored as an 16-bit integer; stride must be equal to 8. +- * +- * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M. +- * Each 32-bit component is decoded in isolation; stride must be divisible by 4. +- */ +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride); +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride); +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride); +- +-/** +- * Vertex buffer filter encoders +- * These functions can be used to encode data in a format that meshopt_decodeFilter can decode +- * +- * meshopt_encodeFilterOct encodes unit vectors with K-bit (K <= 16) signed X/Y as an output. +- * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. +- * Input data must contain 4 floats for every vector (count*4 total). +- * +- * meshopt_encodeFilterQuat encodes unit quaternions with K-bit (4 <= K <= 16) component encoding. +- * Each component is stored as an 16-bit integer; stride must be equal to 8. +- * Input data must contain 4 floats for every quaternion (count*4 total). +- * +- * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24). +- * Mantissa is shared between all components of a given vector as defined by stride; stride must be divisible by 4. +- * Input data must contain stride/4 floats for every vector (count*stride/4 total). +- * When individual (scalar) encoding is desired, simply pass stride=4 and adjust count accordingly. +- */ +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data); +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data); +- +-/** +- * Simplification options +- */ +-enum +-{ +- /* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */ +- meshopt_SimplifyLockBorder = 1 << 0, +-}; +- +-/** +- * Mesh simplifier +- * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible +- * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. +- * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification. +- * Returns the number of indices after simplification, with destination containing new index data +- * The resulting index buffer references vertices from the original vertex buffer. +- * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. +- * +- * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation +- * options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default +- * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification +- */ +-MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error); +- +-/** +- * Experimental: Mesh simplifier (sloppy) +- * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance +- * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error. +- * Returns the number of indices after simplification, with destination containing new index data +- * The resulting index buffer references vertices from the original vertex buffer. +- * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. +- * +- * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation +- * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification +- */ +-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); +- +-/** +- * Experimental: Point cloud simplifier +- * Reduces the number of points in the cloud to reach the given target +- * Returns the number of points after simplification, with destination containing new index data +- * The resulting index buffer references vertices from the original vertex buffer. +- * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. +- * +- * destination must contain enough space for the target index buffer (target_vertex_count elements) +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- */ +-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count); +- +-/** +- * Returns the error scaling factor used by the simplifier to convert between absolute and relative extents +- * +- * Absolute error must be *divided* by the scaling factor before passing it to meshopt_simplify as target_error +- * Relative error returned by meshopt_simplify via result_error must be *multiplied* by the scaling factor to get absolute error. +- */ +-MESHOPTIMIZER_API float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +- +-/** +- * Mesh stripifier +- * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles +- * Returns the number of indices in the resulting strip, with destination containing new index data +- * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. +- * Using restart indices can result in ~10% smaller index buffers, but on some GPUs restart indices may result in decreased performance. +- * +- * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_stripifyBound +- * restart_index should be 0xffff or 0xffffffff depending on index size, or 0 to use degenerate triangles +- */ +-MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index); +-MESHOPTIMIZER_API size_t meshopt_stripifyBound(size_t index_count); +- +-/** +- * Mesh unstripifier +- * Converts a triangle strip to a triangle list +- * Returns the number of indices in the resulting list, with destination containing new index data +- * +- * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_unstripifyBound +- */ +-MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index); +-MESHOPTIMIZER_API size_t meshopt_unstripifyBound(size_t index_count); +- +-struct meshopt_VertexCacheStatistics +-{ +- unsigned int vertices_transformed; +- unsigned int warps_executed; +- float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */ +- float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */ +-}; +- +-/** +- * Vertex transform cache analyzer +- * Returns cache hit statistics using a simplified FIFO model +- * Results may not match actual GPU performance +- */ +-MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size); +- +-struct meshopt_OverdrawStatistics +-{ +- unsigned int pixels_covered; +- unsigned int pixels_shaded; +- float overdraw; /* shaded pixels / covered pixels; best case 1.0 */ +-}; +- +-/** +- * Overdraw analyzer +- * Returns overdraw statistics using a software rasterizer +- * Results may not match actual GPU performance +- * +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- */ +-MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +- +-struct meshopt_VertexFetchStatistics +-{ +- unsigned int bytes_fetched; +- float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */ +-}; +- +-/** +- * Vertex fetch cache analyzer +- * Returns cache hit statistics using a simplified direct mapped model +- * Results may not match actual GPU performance +- */ +-MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size); +- +-struct meshopt_Meshlet +-{ +- /* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */ +- unsigned int vertex_offset; +- unsigned int triangle_offset; +- +- /* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */ +- unsigned int vertex_count; +- unsigned int triangle_count; +-}; +- +-/** +- * Meshlet builder +- * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer +- * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers. +- * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters. +- * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first. +- * +- * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound +- * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices +- * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512) +- * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency +- */ +-MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +-MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +-MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); +- +-struct meshopt_Bounds +-{ +- /* bounding sphere, useful for frustum and occlusion culling */ +- float center[3]; +- float radius; +- +- /* normal cone, useful for backface culling */ +- float cone_apex[3]; +- float cone_axis[3]; +- float cone_cutoff; /* = cos(angle/2) */ +- +- /* normal cone axis and cutoff, stored in 8-bit SNORM format; decode using x/127.0 */ +- signed char cone_axis_s8[3]; +- signed char cone_cutoff_s8; +-}; +- +-/** +- * Cluster bounds generator +- * Creates bounding volumes that can be used for frustum, backface and occlusion culling. +- * +- * For backface culling with orthographic projection, use the following formula to reject backfacing clusters: +- * dot(view, cone_axis) >= cone_cutoff +- * +- * For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff: +- * dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff +- * +- * Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead: +- * dot(normalize(center - camera_position), cone_axis) >= cone_cutoff + radius / length(center - camera_position) +- * or an equivalent formula that doesn't have a singularity at center = camera_position: +- * dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius +- * +- * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere +- * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable. +- * +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size) +- */ +-MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +-MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +- +-/** +- * Experimental: Spatial sorter +- * Generates a remap table that can be used to reorder points for spatial locality. +- * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer. +- * +- * destination must contain enough space for the resulting remap table (vertex_count elements) +- */ +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +- +-/** +- * Experimental: Spatial sorter +- * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache. +- * +- * destination must contain enough space for the resulting index buffer (index_count elements) +- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer +- */ +-MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +- +-/** +- * Set allocation callbacks +- * These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library. +- * Note that all algorithms only allocate memory for temporary use. +- * allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first. +- */ +-MESHOPTIMIZER_API void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*)); +- +-#ifdef __cplusplus +-} /* extern "C" */ +-#endif +- +-/* Quantization into commonly supported data formats */ +-#ifdef __cplusplus +-/** +- * Quantize a float in [0..1] range into an N-bit fixed point unorm value +- * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion +- * Maximum reconstruction error: 1/2^(N+1) +- */ +-inline int meshopt_quantizeUnorm(float v, int N); +- +-/** +- * Quantize a float in [-1..1] range into an N-bit fixed point snorm value +- * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions) +- * Maximum reconstruction error: 1/2^N +- */ +-inline int meshopt_quantizeSnorm(float v, int N); +- +-/** +- * Quantize a float into half-precision floating point value +- * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest +- * Representable magnitude range: [6e-5; 65504] +- * Maximum relative reconstruction error: 5e-4 +- */ +-inline unsigned short meshopt_quantizeHalf(float v); +- +-/** +- * Quantize a float into a floating point value with a limited number of significant mantissa bits +- * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest +- * Assumes N is in a valid mantissa precision range, which is 1..23 +- */ +-inline float meshopt_quantizeFloat(float v, int N); +-#endif +- +-/** +- * C++ template interface +- * +- * These functions mirror the C interface the library provides, providing template-based overloads so that +- * the caller can use an arbitrary type for the index data, both for input and output. +- * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not, +- * the wrappers end up allocating memory and copying index data to convert from one type to another. +- */ +-#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +-template +-inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +-template +-inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +-template +-inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap); +-template +-inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); +-template +-inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +-template +-inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +-template +-inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +-template +-inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count); +-template +-inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count); +-template +-inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); +-template +-inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); +-template +-inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count); +-template +-inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +-template +-inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +-template +-inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +-template +-inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +-template +-inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +-template +-inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = 0); +-template +-inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0); +-template +-inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index); +-template +-inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index); +-template +-inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size); +-template +-inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +-template +-inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); +-template +-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +-template +-inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +-template +-inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +-template +-inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +-#endif +- +-/* Inline implementation */ +-#ifdef __cplusplus +-inline int meshopt_quantizeUnorm(float v, int N) +-{ +- const float scale = float((1 << N) - 1); +- +- v = (v >= 0) ? v : 0; +- v = (v <= 1) ? v : 1; +- +- return int(v * scale + 0.5f); +-} +- +-inline int meshopt_quantizeSnorm(float v, int N) +-{ +- const float scale = float((1 << (N - 1)) - 1); +- +- float round = (v >= 0 ? 0.5f : -0.5f); +- +- v = (v >= -1) ? v : -1; +- v = (v <= +1) ? v : +1; +- +- return int(v * scale + round); +-} +- +-inline unsigned short meshopt_quantizeHalf(float v) +-{ +- union { float f; unsigned int ui; } u = {v}; +- unsigned int ui = u.ui; +- +- int s = (ui >> 16) & 0x8000; +- int em = ui & 0x7fffffff; +- +- /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */ +- int h = (em - (112 << 23) + (1 << 12)) >> 13; +- +- /* underflow: flush to zero; 113 encodes exponent -14 */ +- h = (em < (113 << 23)) ? 0 : h; +- +- /* overflow: infinity; 143 encodes exponent 16 */ +- h = (em >= (143 << 23)) ? 0x7c00 : h; +- +- /* NaN; note that we convert all types of NaN to qNaN */ +- h = (em > (255 << 23)) ? 0x7e00 : h; +- +- return (unsigned short)(s | h); +-} +- +-inline float meshopt_quantizeFloat(float v, int N) +-{ +- union { float f; unsigned int ui; } u = {v}; +- unsigned int ui = u.ui; +- +- const int mask = (1 << (23 - N)) - 1; +- const int round = (1 << (23 - N)) >> 1; +- +- int e = ui & 0x7f800000; +- unsigned int rui = (ui + round) & ~mask; +- +- /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */ +- ui = e == 0x7f800000 ? ui : rui; +- +- /* flush denormals to zero */ +- ui = e == 0 ? 0 : ui; +- +- u.ui = ui; +- return u.f; +-} +-#endif +- +-/* Internal implementation helpers */ +-#ifdef __cplusplus +-class meshopt_Allocator +-{ +-public: +- template +- struct StorageT +- { +- static void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t); +- static void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*); +- }; +- +- typedef StorageT Storage; +- +- meshopt_Allocator() +- : blocks() +- , count(0) +- { +- } +- +- ~meshopt_Allocator() +- { +- for (size_t i = count; i > 0; --i) +- Storage::deallocate(blocks[i - 1]); +- } +- +- template T* allocate(size_t size) +- { +- assert(count < sizeof(blocks) / sizeof(blocks[0])); +- T* result = static_cast(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T))); +- blocks[count++] = result; +- return result; +- } +- +-private: +- void* blocks[24]; +- size_t count; +-}; +- +-// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker +-template void* (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::allocate)(size_t) = operator new; +-template void (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::deallocate)(void*) = operator delete; +-#endif +- +-/* Inline implementation for C++ templated wrappers */ +-#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +-template +-struct meshopt_IndexAdapter; +- +-template +-struct meshopt_IndexAdapter +-{ +- T* result; +- unsigned int* data; +- size_t count; +- +- meshopt_IndexAdapter(T* result_, const T* input, size_t count_) +- : result(result_) +- , data(0) +- , count(count_) +- { +- size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int); +- +- data = static_cast(meshopt_Allocator::Storage::allocate(size)); +- +- if (input) +- { +- for (size_t i = 0; i < count; ++i) +- data[i] = input[i]; +- } +- } +- +- ~meshopt_IndexAdapter() +- { +- if (result) +- { +- for (size_t i = 0; i < count; ++i) +- result[i] = T(data[i]); +- } +- +- meshopt_Allocator::Storage::deallocate(data); +- } +-}; +- +-template +-struct meshopt_IndexAdapter +-{ +- unsigned int* data; +- +- meshopt_IndexAdapter(T* result, const T* input, size_t) +- : data(reinterpret_cast(result ? result : const_cast(input))) +- { +- } +-}; +- +-template +-inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +-{ +- meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); +- +- return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size); +-} +- +-template +-inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +-{ +- meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); +- +- return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count); +-} +- +-template +-inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap) +-{ +- meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap); +-} +- +-template +-inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride); +-} +- +-template +-inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count); +-} +- +-template +-inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count * 2); +- +- meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +-} +- +-template +-inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count * 4); +- +- meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +-} +- +-template +-inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count); +-} +- +-template +-inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count); +-} +- +-template +-inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size); +-} +- +-template +-inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold); +-} +- +-template +-inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count); +-} +- +-template +-inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +-{ +- meshopt_IndexAdapter inout(indices, indices, index_count); +- +- return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size); +-} +- +-template +-inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count); +-} +- +-template +-inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +-{ +- char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; +- (void)index_size_valid; +- +- return meshopt_decodeIndexBuffer(destination, index_count, sizeof(T), buffer, buffer_size); +-} +- +-template +-inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count); +-} +- +-template +-inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +-{ +- char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; +- (void)index_size_valid; +- +- return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size); +-} +- +-template +-inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, options, result_error); +-} +- +-template +-inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error); +-} +- +-template +-inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, (index_count / 3) * 5); +- +- return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index)); +-} +- +-template +-inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, (index_count - 2) * 3); +- +- return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index)); +-} +- +-template +-inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size); +-} +- +-template +-inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +-} +- +-template +-inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); +-} +- +-template +-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight); +-} +- +-template +-inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles); +-} +- +-template +-inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- +- return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +-} +- +-template +-inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- meshopt_IndexAdapter in(0, indices, index_count); +- meshopt_IndexAdapter out(destination, 0, index_count); +- +- meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +-} +-#endif +- +-/** +- * Copyright (c) 2016-2022 Arseny Kapoulkine +- * +- * Permission is hereby granted, free of charge, to any person +- * obtaining a copy of this software and associated documentation +- * files (the "Software"), to deal in the Software without +- * restriction, including without limitation the rights to use, +- * copy, modify, merge, publish, distribute, sublicense, and/or sell +- * copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following +- * conditions: +- * +- * The above copyright notice and this permission notice shall be +- * included in all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +- * OTHER DEALINGS IN THE SOFTWARE. +- */ +diff --git a/src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp b/src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp +deleted file mode 100644 +index 8d5859b..0000000 +--- a/src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp ++++ /dev/null +@@ -1,230 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +-#include +- +-// This work is based on: +-// Nicolas Capens. Advanced Rasterization. 2004 +-namespace meshopt +-{ +- +-const int kViewport = 256; +- +-struct OverdrawBuffer +-{ +- float z[kViewport][kViewport][2]; +- unsigned int overdraw[kViewport][kViewport][2]; +-}; +- +-#ifndef min +-#define min(a, b) ((a) < (b) ? (a) : (b)) +-#endif +- +-#ifndef max +-#define max(a, b) ((a) > (b) ? (a) : (b)) +-#endif +- +-static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3) +-{ +- // z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1) +- // z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1) +- // (x2-x1 y2-y1)(dzdx) = (z2-z1) +- // (x3-x1 y3-y1)(dzdy) (z3-z1) +- // we'll solve it with Cramer's rule +- float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1); +- float invdet = (det == 0) ? 0 : 1 / det; +- +- dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet; +- dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet; +- +- return det; +-} +- +-// half-space fixed point triangle rasterizer +-static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z) +-{ +- // compute depth gradients +- float DZx, DZy; +- float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); +- int sign = det > 0; +- +- // flip backfacing triangles to simplify rasterization logic +- if (sign) +- { +- // flipping v2 & v3 preserves depth gradients since they're based on v1 +- float t; +- t = v2x, v2x = v3x, v3x = t; +- t = v2y, v2y = v3y, v3y = t; +- t = v2z, v2z = v3z, v3z = t; +- +- // flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below +- v1z = kViewport - v1z; +- DZx = -DZx; +- DZy = -DZy; +- } +- +- // coordinates, 28.4 fixed point +- int X1 = int(16.0f * v1x + 0.5f); +- int X2 = int(16.0f * v2x + 0.5f); +- int X3 = int(16.0f * v3x + 0.5f); +- +- int Y1 = int(16.0f * v1y + 0.5f); +- int Y2 = int(16.0f * v2y + 0.5f); +- int Y3 = int(16.0f * v3y + 0.5f); +- +- // bounding rectangle, clipped against viewport +- // since we rasterize pixels with covered centers, min >0.5 should round up +- // as for max, due to top-left filling convention we will never rasterize right/bottom edges +- // so max >= 0.5 should round down +- int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0); +- int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport); +- int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0); +- int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport); +- +- // deltas, 28.4 fixed point +- int DX12 = X1 - X2; +- int DX23 = X2 - X3; +- int DX31 = X3 - X1; +- +- int DY12 = Y1 - Y2; +- int DY23 = Y2 - Y3; +- int DY31 = Y3 - Y1; +- +- // fill convention correction +- int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0); +- int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0); +- int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0); +- +- // half edge equations, 24.8 fixed point +- // note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers +- int FX = (minx << 4) + 8; +- int FY = (miny << 4) + 8; +- int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1; +- int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1; +- int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1; +- float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f); +- +- for (int y = miny; y < maxy; y++) +- { +- int CX1 = CY1; +- int CX2 = CY2; +- int CX3 = CY3; +- float ZX = ZY; +- +- for (int x = minx; x < maxx; x++) +- { +- // check if all CXn are non-negative +- if ((CX1 | CX2 | CX3) >= 0) +- { +- if (ZX >= buffer->z[y][x][sign]) +- { +- buffer->z[y][x][sign] = ZX; +- buffer->overdraw[y][x][sign]++; +- } +- } +- +- // signed left shift is UB for negative numbers so use unsigned-signed casts +- CX1 -= int(unsigned(DY12) << 4); +- CX2 -= int(unsigned(DY23) << 4); +- CX3 -= int(unsigned(DY31) << 4); +- ZX += DZx; +- } +- +- // signed left shift is UB for negative numbers so use unsigned-signed casts +- CY1 += int(unsigned(DX12) << 4); +- CY2 += int(unsigned(DX23) << 4); +- CY3 += int(unsigned(DX31) << 4); +- ZY += DZy; +- } +-} +- +-} // namespace meshopt +- +-meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- meshopt_Allocator allocator; +- +- size_t vertex_stride_float = vertex_positions_stride / sizeof(float); +- +- meshopt_OverdrawStatistics result = {}; +- +- float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; +- float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- const float* v = vertex_positions + i * vertex_stride_float; +- +- for (int j = 0; j < 3; ++j) +- { +- minv[j] = min(minv[j], v[j]); +- maxv[j] = max(maxv[j], v[j]); +- } +- } +- +- float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2])); +- float scale = kViewport / extent; +- +- float* triangles = allocator.allocate(index_count * 3); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices[i]; +- assert(index < vertex_count); +- +- const float* v = vertex_positions + index * vertex_stride_float; +- +- triangles[i * 3 + 0] = (v[0] - minv[0]) * scale; +- triangles[i * 3 + 1] = (v[1] - minv[1]) * scale; +- triangles[i * 3 + 2] = (v[2] - minv[2]) * scale; +- } +- +- OverdrawBuffer* buffer = allocator.allocate(1); +- +- for (int axis = 0; axis < 3; ++axis) +- { +- memset(buffer, 0, sizeof(OverdrawBuffer)); +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- const float* vn0 = &triangles[3 * (i + 0)]; +- const float* vn1 = &triangles[3 * (i + 1)]; +- const float* vn2 = &triangles[3 * (i + 2)]; +- +- switch (axis) +- { +- case 0: +- rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]); +- break; +- case 1: +- rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]); +- break; +- case 2: +- rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]); +- break; +- } +- } +- +- for (int y = 0; y < kViewport; ++y) +- for (int x = 0; x < kViewport; ++x) +- for (int s = 0; s < 2; ++s) +- { +- unsigned int overdraw = buffer->overdraw[y][x][s]; +- +- result.pixels_covered += overdraw > 0; +- result.pixels_shaded += overdraw; +- } +- } +- +- result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f; +- +- return result; +-} +diff --git a/src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp b/src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp +deleted file mode 100644 +index 143656e..0000000 +--- a/src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp ++++ /dev/null +@@ -1,333 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +-#include +- +-// This work is based on: +-// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 +-namespace meshopt +-{ +- +-static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count) +-{ +- size_t vertex_stride_float = vertex_positions_stride / sizeof(float); +- +- float mesh_centroid[3] = {}; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- const float* p = vertex_positions + vertex_stride_float * indices[i]; +- +- mesh_centroid[0] += p[0]; +- mesh_centroid[1] += p[1]; +- mesh_centroid[2] += p[2]; +- } +- +- mesh_centroid[0] /= index_count; +- mesh_centroid[1] /= index_count; +- mesh_centroid[2] /= index_count; +- +- for (size_t cluster = 0; cluster < cluster_count; ++cluster) +- { +- size_t cluster_begin = clusters[cluster] * 3; +- size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count; +- assert(cluster_begin < cluster_end); +- +- float cluster_area = 0; +- float cluster_centroid[3] = {}; +- float cluster_normal[3] = {}; +- +- for (size_t i = cluster_begin; i < cluster_end; i += 3) +- { +- const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0]; +- const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1]; +- const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2]; +- +- float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; +- float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; +- +- float normalx = p10[1] * p20[2] - p10[2] * p20[1]; +- float normaly = p10[2] * p20[0] - p10[0] * p20[2]; +- float normalz = p10[0] * p20[1] - p10[1] * p20[0]; +- +- float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); +- +- cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3); +- cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3); +- cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3); +- cluster_normal[0] += normalx; +- cluster_normal[1] += normaly; +- cluster_normal[2] += normalz; +- cluster_area += area; +- } +- +- float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area; +- +- cluster_centroid[0] *= inv_cluster_area; +- cluster_centroid[1] *= inv_cluster_area; +- cluster_centroid[2] *= inv_cluster_area; +- +- float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]); +- float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length; +- +- cluster_normal[0] *= inv_cluster_normal_length; +- cluster_normal[1] *= inv_cluster_normal_length; +- cluster_normal[2] *= inv_cluster_normal_length; +- +- float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]}; +- +- sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2]; +- } +-} +- +-static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count) +-{ +- // compute sort data bounds and renormalize, using fixed point snorm +- float sort_data_max = 1e-3f; +- +- for (size_t i = 0; i < cluster_count; ++i) +- { +- float dpa = fabsf(sort_data[i]); +- +- sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max; +- } +- +- const int sort_bits = 11; +- +- for (size_t i = 0; i < cluster_count; ++i) +- { +- // note that we flip distribution since high dot product should come first +- float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max); +- +- sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1); +- } +- +- // fill histogram for counting sort +- unsigned int histogram[1 << sort_bits]; +- memset(histogram, 0, sizeof(histogram)); +- +- for (size_t i = 0; i < cluster_count; ++i) +- { +- histogram[sort_keys[i]]++; +- } +- +- // compute offsets based on histogram data +- size_t histogram_sum = 0; +- +- for (size_t i = 0; i < 1 << sort_bits; ++i) +- { +- size_t count = histogram[i]; +- histogram[i] = unsigned(histogram_sum); +- histogram_sum += count; +- } +- +- assert(histogram_sum == cluster_count); +- +- // compute sort order based on offsets +- for (size_t i = 0; i < cluster_count; ++i) +- { +- sort_order[histogram[sort_keys[i]]++] = unsigned(i); +- } +-} +- +-static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp) +-{ +- unsigned int cache_misses = 0; +- +- // if vertex is not in cache, put it in cache +- if (timestamp - cache_timestamps[a] > cache_size) +- { +- cache_timestamps[a] = timestamp++; +- cache_misses++; +- } +- +- if (timestamp - cache_timestamps[b] > cache_size) +- { +- cache_timestamps[b] = timestamp++; +- cache_misses++; +- } +- +- if (timestamp - cache_timestamps[c] > cache_size) +- { +- cache_timestamps[c] = timestamp++; +- cache_misses++; +- } +- +- return cache_misses; +-} +- +-static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps) +-{ +- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); +- +- unsigned int timestamp = cache_size + 1; +- +- size_t face_count = index_count / 3; +- +- size_t result = 0; +- +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); +- +- // when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh +- // that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently +- // suggests an inefficiency in the vertex cache optimization algorithm +- // usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0 +- if (i == 0 || m == 3) +- { +- destination[result++] = unsigned(i); +- } +- } +- +- assert(result <= index_count / 3); +- +- return result; +-} +- +-static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps) +-{ +- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); +- +- unsigned int timestamp = 0; +- +- size_t result = 0; +- +- for (size_t it = 0; it < cluster_count; ++it) +- { +- size_t start = clusters[it]; +- size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3; +- assert(start < end); +- +- // reset cache +- timestamp += cache_size + 1; +- +- // measure cluster ACMR +- unsigned int cluster_misses = 0; +- +- for (size_t i = start; i < end; ++i) +- { +- unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); +- +- cluster_misses += m; +- } +- +- float cluster_threshold = threshold * (float(cluster_misses) / float(end - start)); +- +- // first cluster always starts from the hard cluster boundary +- destination[result++] = unsigned(start); +- +- // reset cache +- timestamp += cache_size + 1; +- +- unsigned int running_misses = 0; +- unsigned int running_faces = 0; +- +- for (size_t i = start; i < end; ++i) +- { +- unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); +- +- running_misses += m; +- running_faces += 1; +- +- if (float(running_misses) / float(running_faces) <= cluster_threshold) +- { +- // we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one +- // note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last +- // cluster is empty; however, the 'pop_back' after the loop will clean it up +- destination[result++] = unsigned(i + 1); +- +- // reset cache +- timestamp += cache_size + 1; +- +- running_misses = 0; +- running_faces = 0; +- } +- } +- +- // each time we reach the target ACMR we flush the cluster +- // this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles +- // in the last cluster, producing a very bad ACMR and significantly penalizing the overall results +- // thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one +- // there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end' +- // to the cluster boundary array which we need to remove anyway - this code will do that automatically +- if (destination[result - 1] != start) +- { +- result--; +- } +- } +- +- assert(result >= cluster_count); +- assert(result <= index_count / 3); +- +- return result; +-} +- +-} // namespace meshopt +- +-void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- meshopt_Allocator allocator; +- +- // guard for empty meshes +- if (index_count == 0 || vertex_count == 0) +- return; +- +- // support in-place optimization +- if (destination == indices) +- { +- unsigned int* indices_copy = allocator.allocate(index_count); +- memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); +- indices = indices_copy; +- } +- +- unsigned int cache_size = 16; +- +- unsigned int* cache_timestamps = allocator.allocate(vertex_count); +- +- // generate hard boundaries from full-triangle cache misses +- unsigned int* hard_clusters = allocator.allocate(index_count / 3); +- size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps); +- +- // generate soft boundaries +- unsigned int* soft_clusters = allocator.allocate(index_count / 3 + 1); +- size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps); +- +- const unsigned int* clusters = soft_clusters; +- size_t cluster_count = soft_cluster_count; +- +- // fill sort data +- float* sort_data = allocator.allocate(cluster_count); +- calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count); +- +- // sort clusters using sort data +- unsigned short* sort_keys = allocator.allocate(cluster_count); +- unsigned int* sort_order = allocator.allocate(cluster_count); +- calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count); +- +- // fill output buffer +- size_t offset = 0; +- +- for (size_t it = 0; it < cluster_count; ++it) +- { +- unsigned int cluster = sort_order[it]; +- assert(cluster < cluster_count); +- +- size_t cluster_begin = clusters[cluster] * 3; +- size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count; +- assert(cluster_begin < cluster_end); +- +- memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int)); +- offset += cluster_end - cluster_begin; +- } +- +- assert(offset == index_count); +-} +diff --git a/src/3rdparty/meshoptimizer/src/simplifier.cpp b/src/3rdparty/meshoptimizer/src/simplifier.cpp +deleted file mode 100644 +index 72704c1..0000000 +--- a/src/3rdparty/meshoptimizer/src/simplifier.cpp ++++ /dev/null +@@ -1,1677 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +-#include +-#include +- +-#ifndef TRACE +-#define TRACE 0 +-#endif +- +-#if TRACE +-#include +-#endif +- +-#if TRACE +-#define TRACESTATS(i) stats[i]++; +-#else +-#define TRACESTATS(i) (void)0 +-#endif +- +-// This work is based on: +-// Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 +-// Michael Garland. Quadric-based polygonal surface simplification. 1999 +-// Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000 +-// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003 +-// Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019 +-namespace meshopt +-{ +- +-struct EdgeAdjacency +-{ +- struct Edge +- { +- unsigned int next; +- unsigned int prev; +- }; +- +- unsigned int* counts; +- unsigned int* offsets; +- Edge* data; +-}; +- +-static void prepareEdgeAdjacency(EdgeAdjacency& adjacency, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +-{ +- adjacency.counts = allocator.allocate(vertex_count); +- adjacency.offsets = allocator.allocate(vertex_count); +- adjacency.data = allocator.allocate(index_count); +-} +- +-static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* remap) +-{ +- size_t face_count = index_count / 3; +- +- // fill edge counts +- memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int v = remap ? remap[indices[i]] : indices[i]; +- assert(v < vertex_count); +- +- adjacency.counts[v]++; +- } +- +- // fill offset table +- unsigned int offset = 0; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- adjacency.offsets[i] = offset; +- offset += adjacency.counts[i]; +- } +- +- assert(offset == index_count); +- +- // fill edge data +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; +- +- if (remap) +- { +- a = remap[a]; +- b = remap[b]; +- c = remap[c]; +- } +- +- adjacency.data[adjacency.offsets[a]].next = b; +- adjacency.data[adjacency.offsets[a]].prev = c; +- adjacency.offsets[a]++; +- +- adjacency.data[adjacency.offsets[b]].next = c; +- adjacency.data[adjacency.offsets[b]].prev = a; +- adjacency.offsets[b]++; +- +- adjacency.data[adjacency.offsets[c]].next = a; +- adjacency.data[adjacency.offsets[c]].prev = b; +- adjacency.offsets[c]++; +- } +- +- // fix offsets that have been disturbed by the previous pass +- for (size_t i = 0; i < vertex_count; ++i) +- { +- assert(adjacency.offsets[i] >= adjacency.counts[i]); +- +- adjacency.offsets[i] -= adjacency.counts[i]; +- } +-} +- +-struct PositionHasher +-{ +- const float* vertex_positions; +- size_t vertex_stride_float; +- +- size_t hash(unsigned int index) const +- { +- const unsigned int* key = reinterpret_cast(vertex_positions + index * vertex_stride_float); +- +- // scramble bits to make sure that integer coordinates have entropy in lower bits +- unsigned int x = key[0] ^ (key[0] >> 17); +- unsigned int y = key[1] ^ (key[1] >> 17); +- unsigned int z = key[2] ^ (key[2] >> 17); +- +- // Optimized Spatial Hashing for Collision Detection of Deformable Objects +- return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791); +- } +- +- bool equal(unsigned int lhs, unsigned int rhs) const +- { +- return memcmp(vertex_positions + lhs * vertex_stride_float, vertex_positions + rhs * vertex_stride_float, sizeof(float) * 3) == 0; +- } +-}; +- +-static size_t hashBuckets2(size_t count) +-{ +- size_t buckets = 1; +- while (buckets < count + count / 4) +- buckets *= 2; +- +- return buckets; +-} +- +-template +-static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty) +-{ +- assert(buckets > 0); +- assert((buckets & (buckets - 1)) == 0); +- +- size_t hashmod = buckets - 1; +- size_t bucket = hash.hash(key) & hashmod; +- +- for (size_t probe = 0; probe <= hashmod; ++probe) +- { +- T& item = table[bucket]; +- +- if (item == empty) +- return &item; +- +- if (hash.equal(item, key)) +- return &item; +- +- // hash collision, quadratic probing +- bucket = (bucket + probe + 1) & hashmod; +- } +- +- assert(false && "Hash table is full"); // unreachable +- return 0; +-} +- +-static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) +-{ +- PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float)}; +- +- size_t table_size = hashBuckets2(vertex_count); +- unsigned int* table = allocator.allocate(table_size); +- memset(table, -1, table_size * sizeof(unsigned int)); +- +- // build forward remap: for each vertex, which other (canonical) vertex does it map to? +- // we use position equivalence for this, and remap vertices to other existing vertices +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned int index = unsigned(i); +- unsigned int* entry = hashLookup2(table, table_size, hasher, index, ~0u); +- +- if (*entry == ~0u) +- *entry = index; +- +- remap[index] = *entry; +- } +- +- // build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex? +- // entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i +- for (size_t i = 0; i < vertex_count; ++i) +- wedge[i] = unsigned(i); +- +- for (size_t i = 0; i < vertex_count; ++i) +- if (remap[i] != i) +- { +- unsigned int r = remap[i]; +- +- wedge[i] = wedge[r]; +- wedge[r] = unsigned(i); +- } +-} +- +-enum VertexKind +-{ +- Kind_Manifold, // not on an attribute seam, not on any boundary +- Kind_Border, // not on an attribute seam, has exactly two open edges +- Kind_Seam, // on an attribute seam with exactly two attribute seam edges +- Kind_Complex, // none of the above; these vertices can move as long as all wedges move to the target vertex +- Kind_Locked, // none of the above; these vertices can't move +- +- Kind_Count +-}; +- +-// manifold vertices can collapse onto anything +-// border/seam vertices can only be collapsed onto border/seam respectively +-// complex vertices can collapse onto complex/locked +-// a rule of thumb is that collapsing kind A into kind B preserves the kind B in the target vertex +-// for example, while we could collapse Complex into Manifold, this would mean the target vertex isn't Manifold anymore +-const unsigned char kCanCollapse[Kind_Count][Kind_Count] = { +- {1, 1, 1, 1, 1}, +- {0, 1, 0, 0, 0}, +- {0, 0, 1, 0, 0}, +- {0, 0, 0, 1, 1}, +- {0, 0, 0, 0, 0}, +-}; +- +-// if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge +-// note that for seam edges, the opposite edge isn't present in the attribute-based topology +-// but is present if you consider a position-only mesh variant +-const unsigned char kHasOpposite[Kind_Count][Kind_Count] = { +- {1, 1, 1, 0, 1}, +- {1, 0, 1, 0, 0}, +- {1, 1, 1, 0, 1}, +- {0, 0, 0, 0, 0}, +- {1, 0, 1, 0, 0}, +-}; +- +-static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b) +-{ +- unsigned int count = adjacency.counts[a]; +- const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[a]; +- +- for (size_t i = 0; i < count; ++i) +- if (edges[i].next == b) +- return true; +- +- return false; +-} +- +-static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, unsigned int options) +-{ +- memset(loop, -1, vertex_count * sizeof(unsigned int)); +- memset(loopback, -1, vertex_count * sizeof(unsigned int)); +- +- // incoming & outgoing open edges: ~0u if no open edges, i if there are more than 1 +- // note that this is the same data as required in loop[] arrays; loop[] data is only valid for border/seam +- // but here it's okay to fill the data out for other types of vertices as well +- unsigned int* openinc = loopback; +- unsigned int* openout = loop; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned int vertex = unsigned(i); +- +- unsigned int count = adjacency.counts[vertex]; +- const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[vertex]; +- +- for (size_t j = 0; j < count; ++j) +- { +- unsigned int target = edges[j].next; +- +- if (target == vertex) +- { +- // degenerate triangles have two distinct edges instead of three, and the self edge +- // is bi-directional by definition; this can break border/seam classification by "closing" +- // the open edge from another triangle and falsely marking the vertex as manifold +- // instead we mark the vertex as having >1 open edges which turns it into locked/complex +- openinc[vertex] = openout[vertex] = vertex; +- } +- else if (!hasEdge(adjacency, target, vertex)) +- { +- openinc[target] = (openinc[target] == ~0u) ? vertex : target; +- openout[vertex] = (openout[vertex] == ~0u) ? target : vertex; +- } +- } +- } +- +-#if TRACE +- size_t stats[4] = {}; +-#endif +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- if (remap[i] == i) +- { +- if (wedge[i] == i) +- { +- // no attribute seam, need to check if it's manifold +- unsigned int openi = openinc[i], openo = openout[i]; +- +- // note: we classify any vertices with no open edges as manifold +- // this is technically incorrect - if 4 triangles share an edge, we'll classify vertices as manifold +- // it's unclear if this is a problem in practice +- if (openi == ~0u && openo == ~0u) +- { +- result[i] = Kind_Manifold; +- } +- else if (openi != i && openo != i) +- { +- result[i] = Kind_Border; +- } +- else +- { +- result[i] = Kind_Locked; +- TRACESTATS(0); +- } +- } +- else if (wedge[wedge[i]] == i) +- { +- // attribute seam; need to distinguish between Seam and Locked +- unsigned int w = wedge[i]; +- unsigned int openiv = openinc[i], openov = openout[i]; +- unsigned int openiw = openinc[w], openow = openout[w]; +- +- // seam should have one open half-edge for each vertex, and the edges need to "connect" - point to the same vertex post-remap +- if (openiv != ~0u && openiv != i && openov != ~0u && openov != i && +- openiw != ~0u && openiw != w && openow != ~0u && openow != w) +- { +- if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw]) +- { +- result[i] = Kind_Seam; +- } +- else +- { +- result[i] = Kind_Locked; +- TRACESTATS(1); +- } +- } +- else +- { +- result[i] = Kind_Locked; +- TRACESTATS(2); +- } +- } +- else +- { +- // more than one vertex maps to this one; we don't have classification available +- result[i] = Kind_Locked; +- TRACESTATS(3); +- } +- } +- else +- { +- assert(remap[i] < i); +- +- result[i] = result[remap[i]]; +- } +- } +- +- if (options & meshopt_SimplifyLockBorder) +- for (size_t i = 0; i < vertex_count; ++i) +- if (result[i] == Kind_Border) +- result[i] = Kind_Locked; +- +-#if TRACE +- printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n", +- int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3])); +-#endif +-} +- +-struct Vector3 +-{ +- float x, y, z; +-}; +- +-static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +-{ +- size_t vertex_stride_float = vertex_positions_stride / sizeof(float); +- +- float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; +- float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- const float* v = vertex_positions_data + i * vertex_stride_float; +- +- if (result) +- { +- result[i].x = v[0]; +- result[i].y = v[1]; +- result[i].z = v[2]; +- } +- +- for (int j = 0; j < 3; ++j) +- { +- float vj = v[j]; +- +- minv[j] = minv[j] > vj ? vj : minv[j]; +- maxv[j] = maxv[j] < vj ? vj : maxv[j]; +- } +- } +- +- float extent = 0.f; +- +- extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]); +- extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); +- extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); +- +- if (result) +- { +- float scale = extent == 0 ? 0.f : 1.f / extent; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- result[i].x = (result[i].x - minv[0]) * scale; +- result[i].y = (result[i].y - minv[1]) * scale; +- result[i].z = (result[i].z - minv[2]) * scale; +- } +- } +- +- return extent; +-} +- +-struct Quadric +-{ +- float a00, a11, a22; +- float a10, a20, a21; +- float b0, b1, b2, c; +- float w; +-}; +- +-struct Collapse +-{ +- unsigned int v0; +- unsigned int v1; +- +- union +- { +- unsigned int bidi; +- float error; +- unsigned int errorui; +- }; +-}; +- +-static float normalize(Vector3& v) +-{ +- float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); +- +- if (length > 0) +- { +- v.x /= length; +- v.y /= length; +- v.z /= length; +- } +- +- return length; +-} +- +-static void quadricAdd(Quadric& Q, const Quadric& R) +-{ +- Q.a00 += R.a00; +- Q.a11 += R.a11; +- Q.a22 += R.a22; +- Q.a10 += R.a10; +- Q.a20 += R.a20; +- Q.a21 += R.a21; +- Q.b0 += R.b0; +- Q.b1 += R.b1; +- Q.b2 += R.b2; +- Q.c += R.c; +- Q.w += R.w; +-} +- +-static float quadricError(const Quadric& Q, const Vector3& v) +-{ +- float rx = Q.b0; +- float ry = Q.b1; +- float rz = Q.b2; +- +- rx += Q.a10 * v.y; +- ry += Q.a21 * v.z; +- rz += Q.a20 * v.x; +- +- rx *= 2; +- ry *= 2; +- rz *= 2; +- +- rx += Q.a00 * v.x; +- ry += Q.a11 * v.y; +- rz += Q.a22 * v.z; +- +- float r = Q.c; +- r += rx * v.x; +- r += ry * v.y; +- r += rz * v.z; +- +- float s = Q.w == 0.f ? 0.f : 1.f / Q.w; +- +- return fabsf(r) * s; +-} +- +-static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w) +-{ +- float aw = a * w; +- float bw = b * w; +- float cw = c * w; +- float dw = d * w; +- +- Q.a00 = a * aw; +- Q.a11 = b * bw; +- Q.a22 = c * cw; +- Q.a10 = a * bw; +- Q.a20 = a * cw; +- Q.a21 = b * cw; +- Q.b0 = a * dw; +- Q.b1 = b * dw; +- Q.b2 = c * dw; +- Q.c = d * dw; +- Q.w = w; +-} +- +-static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) +-{ +- // we need to encode (x - X) ^ 2 + (y - Y)^2 + (z - Z)^2 into the quadric +- Q.a00 = w; +- Q.a11 = w; +- Q.a22 = w; +- Q.a10 = 0.f; +- Q.a20 = 0.f; +- Q.a21 = 0.f; +- Q.b0 = -2.f * x * w; +- Q.b1 = -2.f * y * w; +- Q.b2 = -2.f * z * w; +- Q.c = (x * x + y * y + z * z) * w; +- Q.w = w; +-} +- +-static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) +-{ +- Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; +- Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; +- +- // normal = cross(p1 - p0, p2 - p0) +- Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x}; +- float area = normalize(normal); +- +- float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; +- +- // we use sqrtf(area) so that the error is scaled linearly; this tends to improve silhouettes +- quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, sqrtf(area) * weight); +-} +- +-static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) +-{ +- Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; +- float length = normalize(p10); +- +- // p20p = length of projection of p2-p0 onto normalize(p1 - p0) +- Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; +- float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z; +- +- // normal = altitude of triangle from point p2 onto edge p1-p0 +- Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p}; +- normalize(normal); +- +- float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; +- +- // note: the weight is scaled linearly with edge length; this has to match the triangle weight +- quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); +-} +- +-static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) +-{ +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int i0 = indices[i + 0]; +- unsigned int i1 = indices[i + 1]; +- unsigned int i2 = indices[i + 2]; +- +- Quadric Q; +- quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); +- +- quadricAdd(vertex_quadrics[remap[i0]], Q); +- quadricAdd(vertex_quadrics[remap[i1]], Q); +- quadricAdd(vertex_quadrics[remap[i2]], Q); +- } +-} +- +-static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback) +-{ +- for (size_t i = 0; i < index_count; i += 3) +- { +- static const int next[3] = {1, 2, 0}; +- +- for (int e = 0; e < 3; ++e) +- { +- unsigned int i0 = indices[i + e]; +- unsigned int i1 = indices[i + next[e]]; +- +- unsigned char k0 = vertex_kind[i0]; +- unsigned char k1 = vertex_kind[i1]; +- +- // check that either i0 or i1 are border/seam and are on the same edge loop +- // note that we need to add the error even for edged that connect e.g. border & locked +- // if we don't do that, the adjacent border->border edge won't have correct errors for corners +- if (k0 != Kind_Border && k0 != Kind_Seam && k1 != Kind_Border && k1 != Kind_Seam) +- continue; +- +- if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1) +- continue; +- +- if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0) +- continue; +- +- // seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges +- if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) +- continue; +- +- unsigned int i2 = indices[i + next[next[e]]]; +- +- // we try hard to maintain border edge geometry; seam edges can move more freely +- // due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical +- const float kEdgeWeightSeam = 1.f; +- const float kEdgeWeightBorder = 10.f; +- +- float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam; +- +- Quadric Q; +- quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight); +- +- quadricAdd(vertex_quadrics[remap[i0]], Q); +- quadricAdd(vertex_quadrics[remap[i1]], Q); +- } +- } +-} +- +-// does triangle ABC flip when C is replaced with D? +-static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c, const Vector3& d) +-{ +- Vector3 eb = {b.x - a.x, b.y - a.y, b.z - a.z}; +- Vector3 ec = {c.x - a.x, c.y - a.y, c.z - a.z}; +- Vector3 ed = {d.x - a.x, d.y - a.y, d.z - a.z}; +- +- Vector3 nbc = {eb.y * ec.z - eb.z * ec.y, eb.z * ec.x - eb.x * ec.z, eb.x * ec.y - eb.y * ec.x}; +- Vector3 nbd = {eb.y * ed.z - eb.z * ed.y, eb.z * ed.x - eb.x * ed.z, eb.x * ed.y - eb.y * ed.x}; +- +- return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z < 0; +-} +- +-static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, const unsigned int* collapse_remap, unsigned int i0, unsigned int i1) +-{ +- assert(collapse_remap[i0] == i0); +- assert(collapse_remap[i1] == i1); +- +- const Vector3& v0 = vertex_positions[i0]; +- const Vector3& v1 = vertex_positions[i1]; +- +- const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]]; +- size_t count = adjacency.counts[i0]; +- +- for (size_t i = 0; i < count; ++i) +- { +- unsigned int a = collapse_remap[edges[i].next]; +- unsigned int b = collapse_remap[edges[i].prev]; +- +- // skip triangles that get collapsed +- // note: this is mathematically redundant as if either of these is true, the dot product in hasTriangleFlip should be 0 +- if (a == i1 || b == i1) +- continue; +- +- // early-out when at least one triangle flips due to a collapse +- if (hasTriangleFlip(vertex_positions[a], vertex_positions[b], v0, v1)) +- return true; +- } +- +- return false; +-} +- +-static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop) +-{ +- size_t collapse_count = 0; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- static const int next[3] = {1, 2, 0}; +- +- for (int e = 0; e < 3; ++e) +- { +- unsigned int i0 = indices[i + e]; +- unsigned int i1 = indices[i + next[e]]; +- +- // this can happen either when input has a zero-length edge, or when we perform collapses for complex +- // topology w/seams and collapse a manifold vertex that connects to both wedges onto one of them +- // we leave edges like this alone since they may be important for preserving mesh integrity +- if (remap[i0] == remap[i1]) +- continue; +- +- unsigned char k0 = vertex_kind[i0]; +- unsigned char k1 = vertex_kind[i1]; +- +- // the edge has to be collapsible in at least one direction +- if (!(kCanCollapse[k0][k1] | kCanCollapse[k1][k0])) +- continue; +- +- // manifold and seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges +- if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) +- continue; +- +- // two vertices are on a border or a seam, but there's no direct edge between them +- // this indicates that they belong to two different edge loops and we should not collapse this edge +- // loop[] tracks half edges so we only need to check i0->i1 +- if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1) +- continue; +- +- // edge can be collapsed in either direction - we will pick the one with minimum error +- // note: we evaluate error later during collapse ranking, here we just tag the edge as bidirectional +- if (kCanCollapse[k0][k1] & kCanCollapse[k1][k0]) +- { +- Collapse c = {i0, i1, {/* bidi= */ 1}}; +- collapses[collapse_count++] = c; +- } +- else +- { +- // edge can only be collapsed in one direction +- unsigned int e0 = kCanCollapse[k0][k1] ? i0 : i1; +- unsigned int e1 = kCanCollapse[k0][k1] ? i1 : i0; +- +- Collapse c = {e0, e1, {/* bidi= */ 0}}; +- collapses[collapse_count++] = c; +- } +- } +- } +- +- return collapse_count; +-} +- +-static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap) +-{ +- for (size_t i = 0; i < collapse_count; ++i) +- { +- Collapse& c = collapses[i]; +- +- unsigned int i0 = c.v0; +- unsigned int i1 = c.v1; +- +- // most edges are bidirectional which means we need to evaluate errors for two collapses +- // to keep this code branchless we just use the same edge for unidirectional edges +- unsigned int j0 = c.bidi ? i1 : i0; +- unsigned int j1 = c.bidi ? i0 : i1; +- +- const Quadric& qi = vertex_quadrics[remap[i0]]; +- const Quadric& qj = vertex_quadrics[remap[j0]]; +- +- float ei = quadricError(qi, vertex_positions[i1]); +- float ej = quadricError(qj, vertex_positions[j1]); +- +- // pick edge direction with minimal error +- c.v0 = ei <= ej ? i0 : j0; +- c.v1 = ei <= ej ? i1 : j1; +- c.error = ei <= ej ? ei : ej; +- } +-} +- +-#if TRACE > 1 +-static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, const unsigned char* vertex_kind) +-{ +- size_t ckinds[Kind_Count][Kind_Count] = {}; +- float cerrors[Kind_Count][Kind_Count] = {}; +- +- for (int k0 = 0; k0 < Kind_Count; ++k0) +- for (int k1 = 0; k1 < Kind_Count; ++k1) +- cerrors[k0][k1] = FLT_MAX; +- +- for (size_t i = 0; i < collapse_count; ++i) +- { +- unsigned int i0 = collapses[i].v0; +- unsigned int i1 = collapses[i].v1; +- +- unsigned char k0 = vertex_kind[i0]; +- unsigned char k1 = vertex_kind[i1]; +- +- ckinds[k0][k1]++; +- cerrors[k0][k1] = (collapses[i].error < cerrors[k0][k1]) ? collapses[i].error : cerrors[k0][k1]; +- } +- +- for (int k0 = 0; k0 < Kind_Count; ++k0) +- for (int k1 = 0; k1 < Kind_Count; ++k1) +- if (ckinds[k0][k1]) +- printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), ckinds[k0][k1] ? sqrtf(cerrors[k0][k1]) : 0.f); +-} +- +-static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind) +-{ +- size_t locked_collapses[Kind_Count][Kind_Count] = {}; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- static const int next[3] = {1, 2, 0}; +- +- for (int e = 0; e < 3; ++e) +- { +- unsigned int i0 = indices[i + e]; +- unsigned int i1 = indices[i + next[e]]; +- +- unsigned char k0 = vertex_kind[i0]; +- unsigned char k1 = vertex_kind[i1]; +- +- locked_collapses[k0][k1] += !kCanCollapse[k0][k1] && !kCanCollapse[k1][k0]; +- } +- } +- +- for (int k0 = 0; k0 < Kind_Count; ++k0) +- for (int k1 = 0; k1 < Kind_Count; ++k1) +- if (locked_collapses[k0][k1]) +- printf("locked collapses %d -> %d: %d\n", k0, k1, int(locked_collapses[k0][k1])); +-} +-#endif +- +-static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count) +-{ +- const int sort_bits = 11; +- +- // fill histogram for counting sort +- unsigned int histogram[1 << sort_bits]; +- memset(histogram, 0, sizeof(histogram)); +- +- for (size_t i = 0; i < collapse_count; ++i) +- { +- // skip sign bit since error is non-negative +- unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); +- +- histogram[key]++; +- } +- +- // compute offsets based on histogram data +- size_t histogram_sum = 0; +- +- for (size_t i = 0; i < 1 << sort_bits; ++i) +- { +- size_t count = histogram[i]; +- histogram[i] = unsigned(histogram_sum); +- histogram_sum += count; +- } +- +- assert(histogram_sum == collapse_count); +- +- // compute sort order based on offsets +- for (size_t i = 0; i < collapse_count; ++i) +- { +- // skip sign bit since error is non-negative +- unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); +- +- sort_order[histogram[key]++] = unsigned(i); +- } +-} +- +-static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error) +-{ +- size_t edge_collapses = 0; +- size_t triangle_collapses = 0; +- +- // most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit +- // note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses +- size_t edge_collapse_goal = triangle_collapse_goal / 2; +- +-#if TRACE +- size_t stats[4] = {}; +-#endif +- +- for (size_t i = 0; i < collapse_count; ++i) +- { +- const Collapse& c = collapses[collapse_order[i]]; +- +- TRACESTATS(0); +- +- if (c.error > error_limit) +- break; +- +- if (triangle_collapses >= triangle_collapse_goal) +- break; +- +- // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked +- // as they will share vertices with other successfull collapses, we need to increase the acceptable error by some factor +- float error_goal = edge_collapse_goal < collapse_count ? 1.5f * collapses[collapse_order[edge_collapse_goal]].error : FLT_MAX; +- +- // on average, each collapse is expected to lock 6 other collapses; to avoid degenerate passes on meshes with odd +- // topology, we only abort if we got over 1/6 collapses accordingly. +- if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 6) +- break; +- +- unsigned int i0 = c.v0; +- unsigned int i1 = c.v1; +- +- unsigned int r0 = remap[i0]; +- unsigned int r1 = remap[i1]; +- +- // we don't collapse vertices that had source or target vertex involved in a collapse +- // it's important to not move the vertices twice since it complicates the tracking/remapping logic +- // it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass +- if (collapse_locked[r0] | collapse_locked[r1]) +- { +- TRACESTATS(1); +- continue; +- } +- +- if (hasTriangleFlips(adjacency, vertex_positions, collapse_remap, r0, r1)) +- { +- // adjust collapse goal since this collapse is invalid and shouldn't factor into error goal +- edge_collapse_goal++; +- +- TRACESTATS(2); +- continue; +- } +- +- assert(collapse_remap[r0] == r0); +- assert(collapse_remap[r1] == r1); +- +- quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]); +- +- if (vertex_kind[i0] == Kind_Complex) +- { +- unsigned int v = i0; +- +- do +- { +- collapse_remap[v] = r1; +- v = wedge[v]; +- } while (v != i0); +- } +- else if (vertex_kind[i0] == Kind_Seam) +- { +- // remap v0 to v1 and seam pair of v0 to seam pair of v1 +- unsigned int s0 = wedge[i0]; +- unsigned int s1 = wedge[i1]; +- +- assert(s0 != i0 && s1 != i1); +- assert(wedge[s0] == i0 && wedge[s1] == i1); +- +- collapse_remap[i0] = i1; +- collapse_remap[s0] = s1; +- } +- else +- { +- assert(wedge[i0] == i0); +- +- collapse_remap[i0] = i1; +- } +- +- collapse_locked[r0] = 1; +- collapse_locked[r1] = 1; +- +- // border edges collapse 1 triangle, other edges collapse 2 or more +- triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2; +- edge_collapses++; +- +- result_error = result_error < c.error ? c.error : result_error; +- } +- +-#if TRACE +- float error_goal_perfect = edge_collapse_goal < collapse_count ? collapses[collapse_order[edge_collapse_goal]].error : 0.f; +- +- printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d)\n", +- int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect), +- int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2])); +-#endif +- +- return edge_collapses; +-} +- +-static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap) +-{ +- size_t write = 0; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int v0 = collapse_remap[indices[i + 0]]; +- unsigned int v1 = collapse_remap[indices[i + 1]]; +- unsigned int v2 = collapse_remap[indices[i + 2]]; +- +- // we never move the vertex twice during a single pass +- assert(collapse_remap[v0] == v0); +- assert(collapse_remap[v1] == v1); +- assert(collapse_remap[v2] == v2); +- +- if (v0 != v1 && v0 != v2 && v1 != v2) +- { +- indices[write + 0] = v0; +- indices[write + 1] = v1; +- indices[write + 2] = v2; +- write += 3; +- } +- } +- +- return write; +-} +- +-static void remapEdgeLoops(unsigned int* loop, size_t vertex_count, const unsigned int* collapse_remap) +-{ +- for (size_t i = 0; i < vertex_count; ++i) +- { +- if (loop[i] != ~0u) +- { +- unsigned int l = loop[i]; +- unsigned int r = collapse_remap[l]; +- +- // i == r is a special case when the seam edge is collapsed in a direction opposite to where loop goes +- loop[i] = (i == r) ? loop[l] : r; +- } +- } +-} +- +-struct CellHasher +-{ +- const unsigned int* vertex_ids; +- +- size_t hash(unsigned int i) const +- { +- unsigned int h = vertex_ids[i]; +- +- // MurmurHash2 finalizer +- h ^= h >> 13; +- h *= 0x5bd1e995; +- h ^= h >> 15; +- return h; +- } +- +- bool equal(unsigned int lhs, unsigned int rhs) const +- { +- return vertex_ids[lhs] == vertex_ids[rhs]; +- } +-}; +- +-struct IdHasher +-{ +- size_t hash(unsigned int id) const +- { +- unsigned int h = id; +- +- // MurmurHash2 finalizer +- h ^= h >> 13; +- h *= 0x5bd1e995; +- h ^= h >> 15; +- return h; +- } +- +- bool equal(unsigned int lhs, unsigned int rhs) const +- { +- return lhs == rhs; +- } +-}; +- +-struct TriangleHasher +-{ +- const unsigned int* indices; +- +- size_t hash(unsigned int i) const +- { +- const unsigned int* tri = indices + i * 3; +- +- // Optimized Spatial Hashing for Collision Detection of Deformable Objects +- return (tri[0] * 73856093) ^ (tri[1] * 19349663) ^ (tri[2] * 83492791); +- } +- +- bool equal(unsigned int lhs, unsigned int rhs) const +- { +- const unsigned int* lt = indices + lhs * 3; +- const unsigned int* rt = indices + rhs * 3; +- +- return lt[0] == rt[0] && lt[1] == rt[1] && lt[2] == rt[2]; +- } +-}; +- +-static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size) +-{ +- assert(grid_size >= 1 && grid_size <= 1024); +- float cell_scale = float(grid_size - 1); +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- const Vector3& v = vertex_positions[i]; +- +- int xi = int(v.x * cell_scale + 0.5f); +- int yi = int(v.y * cell_scale + 0.5f); +- int zi = int(v.z * cell_scale + 0.5f); +- +- vertex_ids[i] = (xi << 20) | (yi << 10) | zi; +- } +-} +- +-static size_t countTriangles(const unsigned int* vertex_ids, const unsigned int* indices, size_t index_count) +-{ +- size_t result = 0; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int id0 = vertex_ids[indices[i + 0]]; +- unsigned int id1 = vertex_ids[indices[i + 1]]; +- unsigned int id2 = vertex_ids[indices[i + 2]]; +- +- result += (id0 != id1) & (id0 != id2) & (id1 != id2); +- } +- +- return result; +-} +- +-static size_t fillVertexCells(unsigned int* table, size_t table_size, unsigned int* vertex_cells, const unsigned int* vertex_ids, size_t vertex_count) +-{ +- CellHasher hasher = {vertex_ids}; +- +- memset(table, -1, table_size * sizeof(unsigned int)); +- +- size_t result = 0; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned int* entry = hashLookup2(table, table_size, hasher, unsigned(i), ~0u); +- +- if (*entry == ~0u) +- { +- *entry = unsigned(i); +- vertex_cells[i] = unsigned(result++); +- } +- else +- { +- vertex_cells[i] = vertex_cells[*entry]; +- } +- } +- +- return result; +-} +- +-static size_t countVertexCells(unsigned int* table, size_t table_size, const unsigned int* vertex_ids, size_t vertex_count) +-{ +- IdHasher hasher; +- +- memset(table, -1, table_size * sizeof(unsigned int)); +- +- size_t result = 0; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned int id = vertex_ids[i]; +- unsigned int* entry = hashLookup2(table, table_size, hasher, id, ~0u); +- +- result += (*entry == ~0u); +- *entry = id; +- } +- +- return result; +-} +- +-static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* vertex_cells) +-{ +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int i0 = indices[i + 0]; +- unsigned int i1 = indices[i + 1]; +- unsigned int i2 = indices[i + 2]; +- +- unsigned int c0 = vertex_cells[i0]; +- unsigned int c1 = vertex_cells[i1]; +- unsigned int c2 = vertex_cells[i2]; +- +- bool single_cell = (c0 == c1) & (c0 == c2); +- +- Quadric Q; +- quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], single_cell ? 3.f : 1.f); +- +- if (single_cell) +- { +- quadricAdd(cell_quadrics[c0], Q); +- } +- else +- { +- quadricAdd(cell_quadrics[c0], Q); +- quadricAdd(cell_quadrics[c1], Q); +- quadricAdd(cell_quadrics[c2], Q); +- } +- } +-} +- +-static void fillCellQuadrics(Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* vertex_cells) +-{ +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned int c = vertex_cells[i]; +- const Vector3& v = vertex_positions[i]; +- +- Quadric Q; +- quadricFromPoint(Q, v.x, v.y, v.z, 1.f); +- +- quadricAdd(cell_quadrics[c], Q); +- } +-} +- +-static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count) +-{ +- memset(cell_remap, -1, cell_count * sizeof(unsigned int)); +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned int cell = vertex_cells[i]; +- float error = quadricError(cell_quadrics[cell], vertex_positions[i]); +- +- if (cell_remap[cell] == ~0u || cell_errors[cell] > error) +- { +- cell_remap[cell] = unsigned(i); +- cell_errors[cell] = error; +- } +- } +-} +- +-static size_t filterTriangles(unsigned int* destination, unsigned int* tritable, size_t tritable_size, const unsigned int* indices, size_t index_count, const unsigned int* vertex_cells, const unsigned int* cell_remap) +-{ +- TriangleHasher hasher = {destination}; +- +- memset(tritable, -1, tritable_size * sizeof(unsigned int)); +- +- size_t result = 0; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int c0 = vertex_cells[indices[i + 0]]; +- unsigned int c1 = vertex_cells[indices[i + 1]]; +- unsigned int c2 = vertex_cells[indices[i + 2]]; +- +- if (c0 != c1 && c0 != c2 && c1 != c2) +- { +- unsigned int a = cell_remap[c0]; +- unsigned int b = cell_remap[c1]; +- unsigned int c = cell_remap[c2]; +- +- if (b < a && b < c) +- { +- unsigned int t = a; +- a = b, b = c, c = t; +- } +- else if (c < a && c < b) +- { +- unsigned int t = c; +- c = b, b = a, a = t; +- } +- +- destination[result * 3 + 0] = a; +- destination[result * 3 + 1] = b; +- destination[result * 3 + 2] = c; +- +- unsigned int* entry = hashLookup2(tritable, tritable_size, hasher, unsigned(result), ~0u); +- +- if (*entry == ~0u) +- *entry = unsigned(result++); +- } +- } +- +- return result * 3; +-} +- +-static float interpolate(float y, float x0, float y0, float x1, float y1, float x2, float y2) +-{ +- // three point interpolation from "revenge of interpolation search" paper +- float num = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0); +- float den = (y2 - y) * (x1 - x2) * (y0 - y1) + (y0 - y) * (x1 - x0) * (y1 - y2); +- return x1 + num / den; +-} +- +-} // namespace meshopt +- +-#ifndef NDEBUG +-// Note: this is only exposed for debug visualization purposes; do *not* use these in debug builds +-MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = 0; +-MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0; +-MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0; +-#endif +- +-size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- assert(target_index_count <= index_count); +- assert((options & ~(meshopt_SimplifyLockBorder)) == 0); +- +- meshopt_Allocator allocator; +- +- unsigned int* result = destination; +- +- // build adjacency information +- EdgeAdjacency adjacency = {}; +- prepareEdgeAdjacency(adjacency, index_count, vertex_count, allocator); +- updateEdgeAdjacency(adjacency, indices, index_count, vertex_count, NULL); +- +- // build position remap that maps each vertex to the one with identical position +- unsigned int* remap = allocator.allocate(vertex_count); +- unsigned int* wedge = allocator.allocate(vertex_count); +- buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); +- +- // classify vertices; vertex kind determines collapse rules, see kCanCollapse +- unsigned char* vertex_kind = allocator.allocate(vertex_count); +- unsigned int* loop = allocator.allocate(vertex_count); +- unsigned int* loopback = allocator.allocate(vertex_count); +- classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge, options); +- +-#if TRACE +- size_t unique_positions = 0; +- for (size_t i = 0; i < vertex_count; ++i) +- unique_positions += remap[i] == i; +- +- printf("position remap: %d vertices => %d positions\n", int(vertex_count), int(unique_positions)); +- +- size_t kinds[Kind_Count] = {}; +- for (size_t i = 0; i < vertex_count; ++i) +- kinds[vertex_kind[i]] += remap[i] == i; +- +- printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n", +- int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked])); +-#endif +- +- Vector3* vertex_positions = allocator.allocate(vertex_count); +- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); +- +- Quadric* vertex_quadrics = allocator.allocate(vertex_count); +- memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); +- +- fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap); +- fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback); +- +- if (result != indices) +- memcpy(result, indices, index_count * sizeof(unsigned int)); +- +-#if TRACE +- size_t pass_count = 0; +-#endif +- +- Collapse* edge_collapses = allocator.allocate(index_count); +- unsigned int* collapse_order = allocator.allocate(index_count); +- unsigned int* collapse_remap = allocator.allocate(vertex_count); +- unsigned char* collapse_locked = allocator.allocate(vertex_count); +- +- size_t result_count = index_count; +- float result_error = 0; +- +- // target_error input is linear; we need to adjust it to match quadricError units +- float error_limit = target_error * target_error; +- +- while (result_count > target_index_count) +- { +- // note: throughout the simplification process adjacency structure reflects welded topology for result-in-progress +- updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap); +- +- size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop); +- +- // no edges can be collapsed any more due to topology restrictions +- if (edge_collapse_count == 0) +- break; +- +- rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap); +- +-#if TRACE > 1 +- dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind); +-#endif +- +- sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count); +- +- size_t triangle_collapse_goal = (result_count - target_index_count) / 3; +- +- for (size_t i = 0; i < vertex_count; ++i) +- collapse_remap[i] = unsigned(i); +- +- memset(collapse_locked, 0, vertex_count); +- +-#if TRACE +- printf("pass %d: ", int(pass_count++)); +-#endif +- +- size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error); +- +- // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit +- if (collapses == 0) +- break; +- +- remapEdgeLoops(loop, vertex_count, collapse_remap); +- remapEdgeLoops(loopback, vertex_count, collapse_remap); +- +- size_t new_count = remapIndexBuffer(result, result_count, collapse_remap); +- assert(new_count < result_count); +- +- result_count = new_count; +- } +- +-#if TRACE +- printf("result: %d triangles, error: %e; total %d passes\n", int(result_count), sqrtf(result_error), int(pass_count)); +-#endif +- +-#if TRACE > 1 +- dumpLockedCollapses(result, result_count, vertex_kind); +-#endif +- +-#ifndef NDEBUG +- if (meshopt_simplifyDebugKind) +- memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count); +- +- if (meshopt_simplifyDebugLoop) +- memcpy(meshopt_simplifyDebugLoop, loop, vertex_count * sizeof(unsigned int)); +- +- if (meshopt_simplifyDebugLoopBack) +- memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int)); +-#endif +- +- // result_error is quadratic; we need to remap it back to linear +- if (out_result_error) +- *out_result_error = sqrtf(result_error); +- +- return result_count; +-} +- +-size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- assert(target_index_count <= index_count); +- +- // we expect to get ~2 triangles/vertex in the output +- size_t target_cell_count = target_index_count / 6; +- +- meshopt_Allocator allocator; +- +- Vector3* vertex_positions = allocator.allocate(vertex_count); +- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); +- +- // find the optimal grid size using guided binary search +-#if TRACE +- printf("source: %d vertices, %d triangles\n", int(vertex_count), int(index_count / 3)); +- printf("target: %d cells, %d triangles\n", int(target_cell_count), int(target_index_count / 3)); +-#endif +- +- unsigned int* vertex_ids = allocator.allocate(vertex_count); +- +- const int kInterpolationPasses = 5; +- +- // invariant: # of triangles in min_grid <= target_count +- int min_grid = int(1.f / (target_error < 1e-3f ? 1e-3f : target_error)); +- int max_grid = 1025; +- size_t min_triangles = 0; +- size_t max_triangles = index_count / 3; +- +- // when we're error-limited, we compute the triangle count for the min. size; this accelerates convergence and provides the correct answer when we can't use a larger grid +- if (min_grid > 1) +- { +- computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); +- min_triangles = countTriangles(vertex_ids, indices, index_count); +- } +- +- // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size... +- int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f); +- +- for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass) +- { +- if (min_triangles >= target_index_count / 3 || max_grid - min_grid <= 1) +- break; +- +- // we clamp the prediction of the grid size to make sure that the search converges +- int grid_size = next_grid_size; +- grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; +- +- computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); +- size_t triangles = countTriangles(vertex_ids, indices, index_count); +- +-#if TRACE +- printf("pass %d (%s): grid size %d, triangles %d, %s\n", +- pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", +- grid_size, int(triangles), +- (triangles <= target_index_count / 3) ? "under" : "over"); +-#endif +- +- float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles)); +- +- if (triangles <= target_index_count / 3) +- { +- min_grid = grid_size; +- min_triangles = triangles; +- } +- else +- { +- max_grid = grid_size; +- max_triangles = triangles; +- } +- +- // we start by using interpolation search - it usually converges faster +- // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN) +- next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2; +- } +- +- if (min_triangles == 0) +- { +- if (out_result_error) +- *out_result_error = 1.f; +- +- return 0; +- } +- +- // build vertex->cell association by mapping all vertices with the same quantized position to the same cell +- size_t table_size = hashBuckets2(vertex_count); +- unsigned int* table = allocator.allocate(table_size); +- +- unsigned int* vertex_cells = allocator.allocate(vertex_count); +- +- computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); +- size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); +- +- // build a quadric for each target cell +- Quadric* cell_quadrics = allocator.allocate(cell_count); +- memset(cell_quadrics, 0, cell_count * sizeof(Quadric)); +- +- fillCellQuadrics(cell_quadrics, indices, index_count, vertex_positions, vertex_cells); +- +- // for each target cell, find the vertex with the minimal error +- unsigned int* cell_remap = allocator.allocate(cell_count); +- float* cell_errors = allocator.allocate(cell_count); +- +- fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count); +- +- // compute error +- float result_error = 0.f; +- +- for (size_t i = 0; i < cell_count; ++i) +- result_error = result_error < cell_errors[i] ? cell_errors[i] : result_error; +- +- // collapse triangles! +- // note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :( +- size_t tritable_size = hashBuckets2(min_triangles); +- unsigned int* tritable = allocator.allocate(tritable_size); +- +- size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap); +- +-#if TRACE +- printf("result: %d cells, %d triangles (%d unfiltered), error %e\n", int(cell_count), int(write / 3), int(min_triangles), sqrtf(result_error)); +-#endif +- +- if (out_result_error) +- *out_result_error = sqrtf(result_error); +- +- return write; +-} +- +-size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count) +-{ +- using namespace meshopt; +- +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- assert(target_vertex_count <= vertex_count); +- +- size_t target_cell_count = target_vertex_count; +- +- if (target_cell_count == 0) +- return 0; +- +- meshopt_Allocator allocator; +- +- Vector3* vertex_positions = allocator.allocate(vertex_count); +- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); +- +- // find the optimal grid size using guided binary search +-#if TRACE +- printf("source: %d vertices\n", int(vertex_count)); +- printf("target: %d cells\n", int(target_cell_count)); +-#endif +- +- unsigned int* vertex_ids = allocator.allocate(vertex_count); +- +- size_t table_size = hashBuckets2(vertex_count); +- unsigned int* table = allocator.allocate(table_size); +- +- const int kInterpolationPasses = 5; +- +- // invariant: # of vertices in min_grid <= target_count +- int min_grid = 0; +- int max_grid = 1025; +- size_t min_vertices = 0; +- size_t max_vertices = vertex_count; +- +- // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size... +- int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f); +- +- for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass) +- { +- assert(min_vertices < target_vertex_count); +- assert(max_grid - min_grid > 1); +- +- // we clamp the prediction of the grid size to make sure that the search converges +- int grid_size = next_grid_size; +- grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; +- +- computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); +- size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count); +- +-#if TRACE +- printf("pass %d (%s): grid size %d, vertices %d, %s\n", +- pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", +- grid_size, int(vertices), +- (vertices <= target_vertex_count) ? "under" : "over"); +-#endif +- +- float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices)); +- +- if (vertices <= target_vertex_count) +- { +- min_grid = grid_size; +- min_vertices = vertices; +- } +- else +- { +- max_grid = grid_size; +- max_vertices = vertices; +- } +- +- if (vertices == target_vertex_count || max_grid - min_grid <= 1) +- break; +- +- // we start by using interpolation search - it usually converges faster +- // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN) +- next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2; +- } +- +- if (min_vertices == 0) +- return 0; +- +- // build vertex->cell association by mapping all vertices with the same quantized position to the same cell +- unsigned int* vertex_cells = allocator.allocate(vertex_count); +- +- computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); +- size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); +- +- // build a quadric for each target cell +- Quadric* cell_quadrics = allocator.allocate(cell_count); +- memset(cell_quadrics, 0, cell_count * sizeof(Quadric)); +- +- fillCellQuadrics(cell_quadrics, vertex_positions, vertex_count, vertex_cells); +- +- // for each target cell, find the vertex with the minimal error +- unsigned int* cell_remap = allocator.allocate(cell_count); +- float* cell_errors = allocator.allocate(cell_count); +- +- fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count); +- +- // copy results to the output +- assert(cell_count <= target_vertex_count); +- memcpy(destination, cell_remap, sizeof(unsigned int) * cell_count); +- +-#if TRACE +- printf("result: %d cells\n", int(cell_count)); +-#endif +- +- return cell_count; +-} +- +-float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- float extent = rescalePositions(NULL, vertex_positions, vertex_count, vertex_positions_stride); +- +- return extent; +-} +diff --git a/src/3rdparty/meshoptimizer/src/spatialorder.cpp b/src/3rdparty/meshoptimizer/src/spatialorder.cpp +deleted file mode 100644 +index b09f80a..0000000 +--- a/src/3rdparty/meshoptimizer/src/spatialorder.cpp ++++ /dev/null +@@ -1,194 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +-#include +- +-// This work is based on: +-// Fabian Giesen. Decoding Morton codes. 2009 +-namespace meshopt +-{ +- +-// "Insert" two 0 bits after each of the 10 low bits of x +-inline unsigned int part1By2(unsigned int x) +-{ +- x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 +- x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 +- x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 +- x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 +- x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 +- return x; +-} +- +-static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +-{ +- size_t vertex_stride_float = vertex_positions_stride / sizeof(float); +- +- float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; +- float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- const float* v = vertex_positions_data + i * vertex_stride_float; +- +- for (int j = 0; j < 3; ++j) +- { +- float vj = v[j]; +- +- minv[j] = minv[j] > vj ? vj : minv[j]; +- maxv[j] = maxv[j] < vj ? vj : maxv[j]; +- } +- } +- +- float extent = 0.f; +- +- extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]); +- extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); +- extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); +- +- float scale = extent == 0 ? 0.f : 1.f / extent; +- +- // generate Morton order based on the position inside a unit cube +- for (size_t i = 0; i < vertex_count; ++i) +- { +- const float* v = vertex_positions_data + i * vertex_stride_float; +- +- int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f); +- int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f); +- int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f); +- +- result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2); +- } +-} +- +-static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count) +-{ +- memset(hist, 0, sizeof(hist)); +- +- // compute 3 10-bit histograms in parallel +- for (size_t i = 0; i < count; ++i) +- { +- unsigned int id = data[i]; +- +- hist[(id >> 0) & 1023][0]++; +- hist[(id >> 10) & 1023][1]++; +- hist[(id >> 20) & 1023][2]++; +- } +- +- unsigned int sumx = 0, sumy = 0, sumz = 0; +- +- // replace histogram data with prefix histogram sums in-place +- for (int i = 0; i < 1024; ++i) +- { +- unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2]; +- +- hist[i][0] = sumx; +- hist[i][1] = sumy; +- hist[i][2] = sumz; +- +- sumx += hx; +- sumy += hy; +- sumz += hz; +- } +- +- assert(sumx == count && sumy == count && sumz == count); +-} +- +-static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass) +-{ +- int bitoff = pass * 10; +- +- for (size_t i = 0; i < count; ++i) +- { +- unsigned int id = (keys[source[i]] >> bitoff) & 1023; +- +- destination[hist[id][pass]++] = source[i]; +- } +-} +- +-} // namespace meshopt +- +-void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- meshopt_Allocator allocator; +- +- unsigned int* keys = allocator.allocate(vertex_count); +- computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride); +- +- unsigned int hist[1024][3]; +- computeHistogram(hist, keys, vertex_count); +- +- unsigned int* scratch = allocator.allocate(vertex_count); +- +- for (size_t i = 0; i < vertex_count; ++i) +- destination[i] = unsigned(i); +- +- // 3-pass radix sort computes the resulting order into scratch +- radixPass(scratch, destination, keys, vertex_count, hist, 0); +- radixPass(destination, scratch, keys, vertex_count, hist, 1); +- radixPass(scratch, destination, keys, vertex_count, hist, 2); +- +- // since our remap table is mapping old=>new, we need to reverse it +- for (size_t i = 0; i < vertex_count; ++i) +- destination[scratch[i]] = unsigned(i); +-} +- +-void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); +- +- (void)vertex_count; +- +- size_t face_count = index_count / 3; +- size_t vertex_stride_float = vertex_positions_stride / sizeof(float); +- +- meshopt_Allocator allocator; +- +- float* centroids = allocator.allocate(face_count * 3); +- +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; +- assert(a < vertex_count && b < vertex_count && c < vertex_count); +- +- const float* va = vertex_positions + a * vertex_stride_float; +- const float* vb = vertex_positions + b * vertex_stride_float; +- const float* vc = vertex_positions + c * vertex_stride_float; +- +- centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f; +- centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f; +- centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f; +- } +- +- unsigned int* remap = allocator.allocate(face_count); +- +- meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3); +- +- // support in-order remap +- if (destination == indices) +- { +- unsigned int* indices_copy = allocator.allocate(index_count); +- memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); +- indices = indices_copy; +- } +- +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; +- unsigned int r = remap[i]; +- +- destination[r * 3 + 0] = a; +- destination[r * 3 + 1] = b; +- destination[r * 3 + 2] = c; +- } +-} +diff --git a/src/3rdparty/meshoptimizer/src/stripifier.cpp b/src/3rdparty/meshoptimizer/src/stripifier.cpp +deleted file mode 100644 +index 8ce17ef..0000000 +--- a/src/3rdparty/meshoptimizer/src/stripifier.cpp ++++ /dev/null +@@ -1,295 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +-#include +- +-// This work is based on: +-// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996 +-namespace meshopt +-{ +- +-static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence) +-{ +- unsigned int index = 0; +- unsigned int iv = ~0u; +- +- for (size_t i = 0; i < buffer_size; ++i) +- { +- unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]]; +- unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc; +- +- if (v < iv) +- { +- index = unsigned(i); +- iv = v; +- } +- } +- +- return index; +-} +- +-static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1) +-{ +- for (size_t i = 0; i < buffer_size; ++i) +- { +- unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; +- +- if (e0 == a && e1 == b) +- return (int(i) << 2) | 2; +- else if (e0 == b && e1 == c) +- return (int(i) << 2) | 0; +- else if (e0 == c && e1 == a) +- return (int(i) << 2) | 1; +- } +- +- return -1; +-} +- +-} // namespace meshopt +- +-size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index) +-{ +- assert(destination != indices); +- assert(index_count % 3 == 0); +- +- using namespace meshopt; +- +- meshopt_Allocator allocator; +- +- const size_t buffer_capacity = 8; +- +- unsigned int buffer[buffer_capacity][3] = {}; +- unsigned int buffer_size = 0; +- +- size_t index_offset = 0; +- +- unsigned int strip[2] = {}; +- unsigned int parity = 0; +- +- size_t strip_size = 0; +- +- // compute vertex valence; this is used to prioritize starting triangle for strips +- unsigned int* valence = allocator.allocate(vertex_count); +- memset(valence, 0, vertex_count * sizeof(unsigned int)); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices[i]; +- assert(index < vertex_count); +- +- valence[index]++; +- } +- +- int next = -1; +- +- while (buffer_size > 0 || index_offset < index_count) +- { +- assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3)); +- +- // fill triangle buffer +- while (buffer_size < buffer_capacity && index_offset < index_count) +- { +- buffer[buffer_size][0] = indices[index_offset + 0]; +- buffer[buffer_size][1] = indices[index_offset + 1]; +- buffer[buffer_size][2] = indices[index_offset + 2]; +- +- buffer_size++; +- index_offset += 3; +- } +- +- assert(buffer_size > 0); +- +- if (next >= 0) +- { +- unsigned int i = next >> 2; +- unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; +- unsigned int v = buffer[i][next & 3]; +- +- // ordered removal from the buffer +- memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); +- buffer_size--; +- +- // update vertex valences for strip start heuristic +- valence[a]--; +- valence[b]--; +- valence[c]--; +- +- // find next triangle (note that edge order flips on every iteration) +- // in some cases we need to perform a swap to pick a different outgoing triangle edge +- // for [a b c], the default strip edge is [b c], but we might want to use [a c] +- int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]); +- int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1; +- +- if (cont < 0 && swap >= 0) +- { +- // [a b c] => [a b a c] +- destination[strip_size++] = strip[0]; +- destination[strip_size++] = v; +- +- // next strip has same winding +- // ? a b => b a v +- strip[1] = v; +- +- next = swap; +- } +- else +- { +- // emit the next vertex in the strip +- destination[strip_size++] = v; +- +- // next strip has flipped winding +- strip[0] = strip[1]; +- strip[1] = v; +- parity ^= 1; +- +- next = cont; +- } +- } +- else +- { +- // if we didn't find anything, we need to find the next new triangle +- // we use a heuristic to maximize the strip length +- unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]); +- unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; +- +- // ordered removal from the buffer +- memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); +- buffer_size--; +- +- // update vertex valences for strip start heuristic +- valence[a]--; +- valence[b]--; +- valence[c]--; +- +- // we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration +- int ea = findStripNext(buffer, buffer_size, c, b); +- int eb = findStripNext(buffer, buffer_size, a, c); +- int ec = findStripNext(buffer, buffer_size, b, a); +- +- // in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest +- // triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear +- // reasons - slightly improves the stripification efficiency +- int mine = INT_MAX; +- mine = (ea >= 0 && mine > ea) ? ea : mine; +- mine = (eb >= 0 && mine > eb) ? eb : mine; +- mine = (ec >= 0 && mine > ec) ? ec : mine; +- +- if (ea == mine) +- { +- // keep abc +- next = ea; +- } +- else if (eb == mine) +- { +- // abc -> bca +- unsigned int t = a; +- a = b, b = c, c = t; +- +- next = eb; +- } +- else if (ec == mine) +- { +- // abc -> cab +- unsigned int t = c; +- c = b, b = a, a = t; +- +- next = ec; +- } +- +- if (restart_index) +- { +- if (strip_size) +- destination[strip_size++] = restart_index; +- +- destination[strip_size++] = a; +- destination[strip_size++] = b; +- destination[strip_size++] = c; +- +- // new strip always starts with the same edge winding +- strip[0] = b; +- strip[1] = c; +- parity = 1; +- } +- else +- { +- if (strip_size) +- { +- // connect last strip using degenerate triangles +- destination[strip_size++] = strip[1]; +- destination[strip_size++] = a; +- } +- +- // note that we may need to flip the emitted triangle based on parity +- // we always end up with outgoing edge "cb" in the end +- unsigned int e0 = parity ? c : b; +- unsigned int e1 = parity ? b : c; +- +- destination[strip_size++] = a; +- destination[strip_size++] = e0; +- destination[strip_size++] = e1; +- +- strip[0] = e0; +- strip[1] = e1; +- parity ^= 1; +- } +- } +- } +- +- return strip_size; +-} +- +-size_t meshopt_stripifyBound(size_t index_count) +-{ +- assert(index_count % 3 == 0); +- +- // worst case without restarts is 2 degenerate indices and 3 indices per triangle +- // worst case with restarts is 1 restart index and 3 indices per triangle +- return (index_count / 3) * 5; +-} +- +-size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index) +-{ +- assert(destination != indices); +- +- size_t offset = 0; +- size_t start = 0; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- if (restart_index && indices[i] == restart_index) +- { +- start = i + 1; +- } +- else if (i - start >= 2) +- { +- unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i]; +- +- // flip winding for odd triangles +- if ((i - start) & 1) +- { +- unsigned int t = a; +- a = b, b = t; +- } +- +- // although we use restart indices, strip swaps still produce degenerate triangles, so skip them +- if (a != b && a != c && b != c) +- { +- destination[offset + 0] = a; +- destination[offset + 1] = b; +- destination[offset + 2] = c; +- offset += 3; +- } +- } +- } +- +- return offset; +-} +- +-size_t meshopt_unstripifyBound(size_t index_count) +-{ +- assert(index_count == 0 || index_count >= 3); +- +- return (index_count == 0) ? 0 : (index_count - 2) * 3; +-} +diff --git a/src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp b/src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp +deleted file mode 100644 +index 3682743..0000000 +--- a/src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp ++++ /dev/null +@@ -1,73 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size) +-{ +- assert(index_count % 3 == 0); +- assert(cache_size >= 3); +- assert(warp_size == 0 || warp_size >= 3); +- +- meshopt_Allocator allocator; +- +- meshopt_VertexCacheStatistics result = {}; +- +- unsigned int warp_offset = 0; +- unsigned int primgroup_offset = 0; +- +- unsigned int* cache_timestamps = allocator.allocate(vertex_count); +- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); +- +- unsigned int timestamp = cache_size + 1; +- +- for (size_t i = 0; i < index_count; i += 3) +- { +- unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; +- assert(a < vertex_count && b < vertex_count && c < vertex_count); +- +- bool ac = (timestamp - cache_timestamps[a]) > cache_size; +- bool bc = (timestamp - cache_timestamps[b]) > cache_size; +- bool cc = (timestamp - cache_timestamps[c]) > cache_size; +- +- // flush cache if triangle doesn't fit into warp or into the primitive buffer +- if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size)) +- { +- result.warps_executed += warp_offset > 0; +- +- warp_offset = 0; +- primgroup_offset = 0; +- +- // reset cache +- timestamp += cache_size + 1; +- } +- +- // update cache and add vertices to warp +- for (int j = 0; j < 3; ++j) +- { +- unsigned int index = indices[i + j]; +- +- if (timestamp - cache_timestamps[index] > cache_size) +- { +- cache_timestamps[index] = timestamp++; +- result.vertices_transformed++; +- warp_offset++; +- } +- } +- +- primgroup_offset++; +- } +- +- size_t unique_vertex_count = 0; +- +- for (size_t i = 0; i < vertex_count; ++i) +- unique_vertex_count += cache_timestamps[i] > 0; +- +- result.warps_executed += warp_offset > 0; +- +- result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3); +- result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count); +- +- return result; +-} +diff --git a/src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp b/src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp +deleted file mode 100644 +index fb8ade4..0000000 +--- a/src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp ++++ /dev/null +@@ -1,473 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-// This work is based on: +-// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006 +-// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 +-namespace meshopt +-{ +- +-const size_t kCacheSizeMax = 16; +-const size_t kValenceMax = 8; +- +-struct VertexScoreTable +-{ +- float cache[1 + kCacheSizeMax]; +- float live[1 + kValenceMax]; +-}; +- +-// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD +-static const VertexScoreTable kVertexScoreTable = { +- {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f}, +- {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f}, +-}; +- +-// Tuned to minimize the encoded index buffer size +-static const VertexScoreTable kVertexScoreTableStrip = { +- {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f}, +- {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f}, +-}; +- +-struct TriangleAdjacency +-{ +- unsigned int* counts; +- unsigned int* offsets; +- unsigned int* data; +-}; +- +-static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +-{ +- size_t face_count = index_count / 3; +- +- // allocate arrays +- adjacency.counts = allocator.allocate(vertex_count); +- adjacency.offsets = allocator.allocate(vertex_count); +- adjacency.data = allocator.allocate(index_count); +- +- // fill triangle counts +- memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); +- +- for (size_t i = 0; i < index_count; ++i) +- { +- assert(indices[i] < vertex_count); +- +- adjacency.counts[indices[i]]++; +- } +- +- // fill offset table +- unsigned int offset = 0; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- adjacency.offsets[i] = offset; +- offset += adjacency.counts[i]; +- } +- +- assert(offset == index_count); +- +- // fill triangle data +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; +- +- adjacency.data[adjacency.offsets[a]++] = unsigned(i); +- adjacency.data[adjacency.offsets[b]++] = unsigned(i); +- adjacency.data[adjacency.offsets[c]++] = unsigned(i); +- } +- +- // fix offsets that have been disturbed by the previous pass +- for (size_t i = 0; i < vertex_count; ++i) +- { +- assert(adjacency.offsets[i] >= adjacency.counts[i]); +- +- adjacency.offsets[i] -= adjacency.counts[i]; +- } +-} +- +-static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count) +-{ +- // check dead-end stack +- while (dead_end_top) +- { +- unsigned int vertex = dead_end[--dead_end_top]; +- +- if (live_triangles[vertex] > 0) +- return vertex; +- } +- +- // input order +- while (input_cursor < vertex_count) +- { +- if (live_triangles[input_cursor] > 0) +- return input_cursor; +- +- ++input_cursor; +- } +- +- return ~0u; +-} +- +-static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) +-{ +- unsigned int best_candidate = ~0u; +- int best_priority = -1; +- +- for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate) +- { +- unsigned int vertex = *next_candidate; +- +- // otherwise we don't need to process it +- if (live_triangles[vertex] > 0) +- { +- int priority = 0; +- +- // will it be in cache after fanning? +- if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size) +- { +- priority = timestamp - cache_timestamps[vertex]; // position in cache +- } +- +- if (priority > best_priority) +- { +- best_candidate = vertex; +- best_priority = priority; +- } +- } +- } +- +- return best_candidate; +-} +- +-static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles) +-{ +- assert(cache_position >= -1 && cache_position < int(kCacheSizeMax)); +- +- unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax; +- +- return table->cache[1 + cache_position] + table->live[live_triangles_clamped]; +-} +- +-static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count) +-{ +- // input order +- while (input_cursor < face_count) +- { +- if (!emitted_flags[input_cursor]) +- return input_cursor; +- +- ++input_cursor; +- } +- +- return ~0u; +-} +- +-} // namespace meshopt +- +-void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- +- meshopt_Allocator allocator; +- +- // guard for empty meshes +- if (index_count == 0 || vertex_count == 0) +- return; +- +- // support in-place optimization +- if (destination == indices) +- { +- unsigned int* indices_copy = allocator.allocate(index_count); +- memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); +- indices = indices_copy; +- } +- +- unsigned int cache_size = 16; +- assert(cache_size <= kCacheSizeMax); +- +- size_t face_count = index_count / 3; +- +- // build adjacency information +- TriangleAdjacency adjacency = {}; +- buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); +- +- // live triangle counts +- unsigned int* live_triangles = allocator.allocate(vertex_count); +- memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); +- +- // emitted flags +- unsigned char* emitted_flags = allocator.allocate(face_count); +- memset(emitted_flags, 0, face_count); +- +- // compute initial vertex scores +- float* vertex_scores = allocator.allocate(vertex_count); +- +- for (size_t i = 0; i < vertex_count; ++i) +- vertex_scores[i] = vertexScore(table, -1, live_triangles[i]); +- +- // compute triangle scores +- float* triangle_scores = allocator.allocate(face_count); +- +- for (size_t i = 0; i < face_count; ++i) +- { +- unsigned int a = indices[i * 3 + 0]; +- unsigned int b = indices[i * 3 + 1]; +- unsigned int c = indices[i * 3 + 2]; +- +- triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c]; +- } +- +- unsigned int cache_holder[2 * (kCacheSizeMax + 3)]; +- unsigned int* cache = cache_holder; +- unsigned int* cache_new = cache_holder + kCacheSizeMax + 3; +- size_t cache_count = 0; +- +- unsigned int current_triangle = 0; +- unsigned int input_cursor = 1; +- +- unsigned int output_triangle = 0; +- +- while (current_triangle != ~0u) +- { +- assert(output_triangle < face_count); +- +- unsigned int a = indices[current_triangle * 3 + 0]; +- unsigned int b = indices[current_triangle * 3 + 1]; +- unsigned int c = indices[current_triangle * 3 + 2]; +- +- // output indices +- destination[output_triangle * 3 + 0] = a; +- destination[output_triangle * 3 + 1] = b; +- destination[output_triangle * 3 + 2] = c; +- output_triangle++; +- +- // update emitted flags +- emitted_flags[current_triangle] = true; +- triangle_scores[current_triangle] = 0; +- +- // new triangle +- size_t cache_write = 0; +- cache_new[cache_write++] = a; +- cache_new[cache_write++] = b; +- cache_new[cache_write++] = c; +- +- // old triangles +- for (size_t i = 0; i < cache_count; ++i) +- { +- unsigned int index = cache[i]; +- +- if (index != a && index != b && index != c) +- { +- cache_new[cache_write++] = index; +- } +- } +- +- unsigned int* cache_temp = cache; +- cache = cache_new, cache_new = cache_temp; +- cache_count = cache_write > cache_size ? cache_size : cache_write; +- +- // update live triangle counts +- live_triangles[a]--; +- live_triangles[b]--; +- live_triangles[c]--; +- +- // remove emitted triangle from adjacency data +- // this makes sure that we spend less time traversing these lists on subsequent iterations +- for (size_t k = 0; k < 3; ++k) +- { +- unsigned int index = indices[current_triangle * 3 + k]; +- +- unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; +- size_t neighbours_size = adjacency.counts[index]; +- +- for (size_t i = 0; i < neighbours_size; ++i) +- { +- unsigned int tri = neighbours[i]; +- +- if (tri == current_triangle) +- { +- neighbours[i] = neighbours[neighbours_size - 1]; +- adjacency.counts[index]--; +- break; +- } +- } +- } +- +- unsigned int best_triangle = ~0u; +- float best_score = 0; +- +- // update cache positions, vertex scores and triangle scores, and find next best triangle +- for (size_t i = 0; i < cache_write; ++i) +- { +- unsigned int index = cache[i]; +- +- int cache_position = i >= cache_size ? -1 : int(i); +- +- // update vertex score +- float score = vertexScore(table, cache_position, live_triangles[index]); +- float score_diff = score - vertex_scores[index]; +- +- vertex_scores[index] = score; +- +- // update scores of vertex triangles +- const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index]; +- const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index]; +- +- for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) +- { +- unsigned int tri = *it; +- assert(!emitted_flags[tri]); +- +- float tri_score = triangle_scores[tri] + score_diff; +- assert(tri_score > 0); +- +- if (best_score < tri_score) +- { +- best_triangle = tri; +- best_score = tri_score; +- } +- +- triangle_scores[tri] = tri_score; +- } +- } +- +- // step through input triangles in order if we hit a dead-end +- current_triangle = best_triangle; +- +- if (current_triangle == ~0u) +- { +- current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count); +- } +- } +- +- assert(input_cursor == face_count); +- assert(output_triangle == face_count); +-} +- +-void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +-{ +- meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable); +-} +- +-void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +-{ +- meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip); +-} +- +-void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +-{ +- using namespace meshopt; +- +- assert(index_count % 3 == 0); +- assert(cache_size >= 3); +- +- meshopt_Allocator allocator; +- +- // guard for empty meshes +- if (index_count == 0 || vertex_count == 0) +- return; +- +- // support in-place optimization +- if (destination == indices) +- { +- unsigned int* indices_copy = allocator.allocate(index_count); +- memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); +- indices = indices_copy; +- } +- +- size_t face_count = index_count / 3; +- +- // build adjacency information +- TriangleAdjacency adjacency = {}; +- buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); +- +- // live triangle counts +- unsigned int* live_triangles = allocator.allocate(vertex_count); +- memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); +- +- // cache time stamps +- unsigned int* cache_timestamps = allocator.allocate(vertex_count); +- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); +- +- // dead-end stack +- unsigned int* dead_end = allocator.allocate(index_count); +- unsigned int dead_end_top = 0; +- +- // emitted flags +- unsigned char* emitted_flags = allocator.allocate(face_count); +- memset(emitted_flags, 0, face_count); +- +- unsigned int current_vertex = 0; +- +- unsigned int timestamp = cache_size + 1; +- unsigned int input_cursor = 1; // vertex to restart from in case of dead-end +- +- unsigned int output_triangle = 0; +- +- while (current_vertex != ~0u) +- { +- const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top; +- +- // emit all vertex neighbours +- const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; +- const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex]; +- +- for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) +- { +- unsigned int triangle = *it; +- +- if (!emitted_flags[triangle]) +- { +- unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; +- +- // output indices +- destination[output_triangle * 3 + 0] = a; +- destination[output_triangle * 3 + 1] = b; +- destination[output_triangle * 3 + 2] = c; +- output_triangle++; +- +- // update dead-end stack +- dead_end[dead_end_top + 0] = a; +- dead_end[dead_end_top + 1] = b; +- dead_end[dead_end_top + 2] = c; +- dead_end_top += 3; +- +- // update live triangle counts +- live_triangles[a]--; +- live_triangles[b]--; +- live_triangles[c]--; +- +- // update cache info +- // if vertex is not in cache, put it in cache +- if (timestamp - cache_timestamps[a] > cache_size) +- cache_timestamps[a] = timestamp++; +- +- if (timestamp - cache_timestamps[b] > cache_size) +- cache_timestamps[b] = timestamp++; +- +- if (timestamp - cache_timestamps[c] > cache_size) +- cache_timestamps[c] = timestamp++; +- +- // update emitted flags +- emitted_flags[triangle] = true; +- } +- } +- +- // next candidates are the ones we pushed to dead-end stack just now +- const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top; +- +- // get next vertex +- current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); +- +- if (current_vertex == ~0u) +- { +- current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count); +- } +- } +- +- assert(output_triangle == face_count); +-} +diff --git a/src/3rdparty/meshoptimizer/src/vertexcodec.cpp b/src/3rdparty/meshoptimizer/src/vertexcodec.cpp +deleted file mode 100644 +index 7925ea8..0000000 +--- a/src/3rdparty/meshoptimizer/src/vertexcodec.cpp ++++ /dev/null +@@ -1,1195 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-// The block below auto-detects SIMD ISA that can be used on the target platform +-#ifndef MESHOPTIMIZER_NO_SIMD +- +-// The SIMD implementation requires SSSE3, which can be enabled unconditionally through compiler settings +-#if defined(__AVX__) || defined(__SSSE3__) +-#define SIMD_SSE +-#endif +- +-// An experimental implementation using AVX512 instructions; it's only enabled when AVX512 is enabled through compiler settings +-#if defined(__AVX512VBMI2__) && defined(__AVX512VBMI__) && defined(__AVX512VL__) && defined(__POPCNT__) +-#undef SIMD_SSE +-#define SIMD_AVX +-#endif +- +-// MSVC supports compiling SSSE3 code regardless of compile options; we use a cpuid-based scalar fallback +-#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) +-#define SIMD_SSE +-#define SIMD_FALLBACK +-#endif +- +-// GCC 4.9+ and clang 3.8+ support targeting SIMD ISA from individual functions; we use a cpuid-based scalar fallback +-#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && ((defined(__clang__) && __clang_major__ * 100 + __clang_minor__ >= 308) || (defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ >= 409)) && (defined(__i386__) || defined(__x86_64__)) +-#define SIMD_SSE +-#define SIMD_FALLBACK +-#define SIMD_TARGET __attribute__((target("ssse3"))) +-#endif +- +-// GCC/clang define these when NEON support is available +-#if defined(__ARM_NEON__) || defined(__ARM_NEON) +-#define SIMD_NEON +-#endif +- +-// On MSVC, we assume that ARM builds always target NEON-capable devices +-#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) +-#define SIMD_NEON +-#endif +- +-// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD +-#if defined(__wasm_simd128__) +-#define SIMD_WASM +-#endif +- +-#ifndef SIMD_TARGET +-#define SIMD_TARGET +-#endif +- +-#endif // !MESHOPTIMIZER_NO_SIMD +- +-#ifdef SIMD_SSE +-#include +-#endif +- +-#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +-#ifdef _MSC_VER +-#include // __cpuid +-#else +-#include // __cpuid +-#endif +-#endif +- +-#ifdef SIMD_AVX +-#include +-#endif +- +-#ifdef SIMD_NEON +-#if defined(_MSC_VER) && defined(_M_ARM64) +-#include +-#else +-#include +-#endif +-#endif +- +-#ifdef SIMD_WASM +-#undef __DEPRECATED +-#pragma clang diagnostic ignored "-Wdeprecated-declarations" +-#include +-#endif +- +-#ifdef SIMD_WASM +-#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i) +-#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) +-#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) +-#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +-#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +-#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2) +-#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3) +-#endif +- +-namespace meshopt +-{ +- +-const unsigned char kVertexHeader = 0xa0; +- +-static int gEncodeVertexVersion = 0; +- +-const size_t kVertexBlockSizeBytes = 8192; +-const size_t kVertexBlockMaxSize = 256; +-const size_t kByteGroupSize = 16; +-const size_t kByteGroupDecodeLimit = 24; +-const size_t kTailMaxSize = 32; +- +-static size_t getVertexBlockSize(size_t vertex_size) +-{ +- // make sure the entire block fits into the scratch buffer +- size_t result = kVertexBlockSizeBytes / vertex_size; +- +- // align to byte group size; we encode each byte as a byte group +- // if vertex block is misaligned, it results in wasted bytes, so just truncate the block size +- result &= ~(kByteGroupSize - 1); +- +- return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize; +-} +- +-inline unsigned char zigzag8(unsigned char v) +-{ +- return ((signed char)(v) >> 7) ^ (v << 1); +-} +- +-inline unsigned char unzigzag8(unsigned char v) +-{ +- return -(v & 1) ^ (v >> 1); +-} +- +-static bool encodeBytesGroupZero(const unsigned char* buffer) +-{ +- for (size_t i = 0; i < kByteGroupSize; ++i) +- if (buffer[i]) +- return false; +- +- return true; +-} +- +-static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits) +-{ +- assert(bits >= 1 && bits <= 8); +- +- if (bits == 1) +- return encodeBytesGroupZero(buffer) ? 0 : size_t(-1); +- +- if (bits == 8) +- return kByteGroupSize; +- +- size_t result = kByteGroupSize * bits / 8; +- +- unsigned char sentinel = (1 << bits) - 1; +- +- for (size_t i = 0; i < kByteGroupSize; ++i) +- result += buffer[i] >= sentinel; +- +- return result; +-} +- +-static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits) +-{ +- assert(bits >= 1 && bits <= 8); +- +- if (bits == 1) +- return data; +- +- if (bits == 8) +- { +- memcpy(data, buffer, kByteGroupSize); +- return data + kByteGroupSize; +- } +- +- size_t byte_size = 8 / bits; +- assert(kByteGroupSize % byte_size == 0); +- +- // fixed portion: bits bits for each value +- // variable portion: full byte for each out-of-range value (using 1...1 as sentinel) +- unsigned char sentinel = (1 << bits) - 1; +- +- for (size_t i = 0; i < kByteGroupSize; i += byte_size) +- { +- unsigned char byte = 0; +- +- for (size_t k = 0; k < byte_size; ++k) +- { +- unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k]; +- +- byte <<= bits; +- byte |= enc; +- } +- +- *data++ = byte; +- } +- +- for (size_t i = 0; i < kByteGroupSize; ++i) +- { +- if (buffer[i] >= sentinel) +- { +- *data++ = buffer[i]; +- } +- } +- +- return data; +-} +- +-static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size) +-{ +- assert(buffer_size % kByteGroupSize == 0); +- +- unsigned char* header = data; +- +- // round number of groups to 4 to get number of header bytes +- size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; +- +- if (size_t(data_end - data) < header_size) +- return 0; +- +- data += header_size; +- +- memset(header, 0, header_size); +- +- for (size_t i = 0; i < buffer_size; i += kByteGroupSize) +- { +- if (size_t(data_end - data) < kByteGroupDecodeLimit) +- return 0; +- +- int best_bits = 8; +- size_t best_size = encodeBytesGroupMeasure(buffer + i, 8); +- +- for (int bits = 1; bits < 8; bits *= 2) +- { +- size_t size = encodeBytesGroupMeasure(buffer + i, bits); +- +- if (size < best_size) +- { +- best_bits = bits; +- best_size = size; +- } +- } +- +- int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3; +- assert((1 << bitslog2) == best_bits); +- +- size_t header_offset = i / kByteGroupSize; +- +- header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2); +- +- unsigned char* next = encodeBytesGroup(data, buffer + i, best_bits); +- +- assert(data + best_size == next); +- data = next; +- } +- +- return data; +-} +- +-static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +-{ +- assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); +- +- unsigned char buffer[kVertexBlockMaxSize]; +- assert(sizeof(buffer) % kByteGroupSize == 0); +- +- // we sometimes encode elements we didn't fill when rounding to kByteGroupSize +- memset(buffer, 0, sizeof(buffer)); +- +- for (size_t k = 0; k < vertex_size; ++k) +- { +- size_t vertex_offset = k; +- +- unsigned char p = last_vertex[k]; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- buffer[i] = zigzag8(vertex_data[vertex_offset] - p); +- +- p = vertex_data[vertex_offset]; +- +- vertex_offset += vertex_size; +- } +- +- data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); +- if (!data) +- return 0; +- } +- +- memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size); +- +- return data; +-} +- +-#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX)) +-static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2) +-{ +-#define READ() byte = *data++ +-#define NEXT(bits) enc = byte >> (8 - bits), byte <<= bits, encv = *data_var, *buffer++ = (enc == (1 << bits) - 1) ? encv : enc, data_var += (enc == (1 << bits) - 1) +- +- unsigned char byte, enc, encv; +- const unsigned char* data_var; +- +- switch (bitslog2) +- { +- case 0: +- memset(buffer, 0, kByteGroupSize); +- return data; +- case 1: +- data_var = data + 4; +- +- // 4 groups with 4 2-bit values in each byte +- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); +- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); +- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); +- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); +- +- return data_var; +- case 2: +- data_var = data + 8; +- +- // 8 groups with 2 4-bit values in each byte +- READ(), NEXT(4), NEXT(4); +- READ(), NEXT(4), NEXT(4); +- READ(), NEXT(4), NEXT(4); +- READ(), NEXT(4), NEXT(4); +- READ(), NEXT(4), NEXT(4); +- READ(), NEXT(4), NEXT(4); +- READ(), NEXT(4), NEXT(4); +- READ(), NEXT(4), NEXT(4); +- +- return data_var; +- case 3: +- memcpy(buffer, data, kByteGroupSize); +- return data + kByteGroupSize; +- default: +- assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value +- return data; +- } +- +-#undef READ +-#undef NEXT +-} +- +-static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +-{ +- assert(buffer_size % kByteGroupSize == 0); +- +- const unsigned char* header = data; +- +- // round number of groups to 4 to get number of header bytes +- size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; +- +- if (size_t(data_end - data) < header_size) +- return 0; +- +- data += header_size; +- +- for (size_t i = 0; i < buffer_size; i += kByteGroupSize) +- { +- if (size_t(data_end - data) < kByteGroupDecodeLimit) +- return 0; +- +- size_t header_offset = i / kByteGroupSize; +- +- int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; +- +- data = decodeBytesGroup(data, buffer + i, bitslog2); +- } +- +- return data; +-} +- +-static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +-{ +- assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); +- +- unsigned char buffer[kVertexBlockMaxSize]; +- unsigned char transposed[kVertexBlockSizeBytes]; +- +- size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); +- +- for (size_t k = 0; k < vertex_size; ++k) +- { +- data = decodeBytes(data, data_end, buffer, vertex_count_aligned); +- if (!data) +- return 0; +- +- size_t vertex_offset = k; +- +- unsigned char p = last_vertex[k]; +- +- for (size_t i = 0; i < vertex_count; ++i) +- { +- unsigned char v = unzigzag8(buffer[i]) + p; +- +- transposed[vertex_offset] = v; +- p = v; +- +- vertex_offset += vertex_size; +- } +- } +- +- memcpy(vertex_data, transposed, vertex_count * vertex_size); +- +- memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); +- +- return data; +-} +-#endif +- +-#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +-static unsigned char kDecodeBytesGroupShuffle[256][8]; +-static unsigned char kDecodeBytesGroupCount[256]; +- +-#ifdef __wasm__ +-__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop! +-#endif +-static bool +-decodeBytesGroupBuildTables() +-{ +- for (int mask = 0; mask < 256; ++mask) +- { +- unsigned char shuffle[8]; +- unsigned char count = 0; +- +- for (int i = 0; i < 8; ++i) +- { +- int maski = (mask >> i) & 1; +- shuffle[i] = maski ? count : 0x80; +- count += (unsigned char)(maski); +- } +- +- memcpy(kDecodeBytesGroupShuffle[mask], shuffle, 8); +- kDecodeBytesGroupCount[mask] = count; +- } +- +- return true; +-} +- +-static bool gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables(); +-#endif +- +-#ifdef SIMD_SSE +-SIMD_TARGET +-static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1) +-{ +- __m128i sm0 = _mm_loadl_epi64(reinterpret_cast(&kDecodeBytesGroupShuffle[mask0])); +- __m128i sm1 = _mm_loadl_epi64(reinterpret_cast(&kDecodeBytesGroupShuffle[mask1])); +- __m128i sm1off = _mm_set1_epi8(kDecodeBytesGroupCount[mask0]); +- +- __m128i sm1r = _mm_add_epi8(sm1, sm1off); +- +- return _mm_unpacklo_epi64(sm0, sm1r); +-} +- +-SIMD_TARGET +-static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +-{ +- switch (bitslog2) +- { +- case 0: +- { +- __m128i result = _mm_setzero_si128(); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +- +- return data; +- } +- +- case 1: +- { +-#ifdef __GNUC__ +- typedef int __attribute__((aligned(1))) unaligned_int; +-#else +- typedef int unaligned_int; +-#endif +- +- __m128i sel2 = _mm_cvtsi32_si128(*reinterpret_cast(data)); +- __m128i rest = _mm_loadu_si128(reinterpret_cast(data + 4)); +- +- __m128i sel22 = _mm_unpacklo_epi8(_mm_srli_epi16(sel2, 4), sel2); +- __m128i sel2222 = _mm_unpacklo_epi8(_mm_srli_epi16(sel22, 2), sel22); +- __m128i sel = _mm_and_si128(sel2222, _mm_set1_epi8(3)); +- +- __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(3)); +- int mask16 = _mm_movemask_epi8(mask); +- unsigned char mask0 = (unsigned char)(mask16 & 255); +- unsigned char mask1 = (unsigned char)(mask16 >> 8); +- +- __m128i shuf = decodeShuffleMask(mask0, mask1); +- +- __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +- +- return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +- } +- +- case 2: +- { +- __m128i sel4 = _mm_loadl_epi64(reinterpret_cast(data)); +- __m128i rest = _mm_loadu_si128(reinterpret_cast(data + 8)); +- +- __m128i sel44 = _mm_unpacklo_epi8(_mm_srli_epi16(sel4, 4), sel4); +- __m128i sel = _mm_and_si128(sel44, _mm_set1_epi8(15)); +- +- __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(15)); +- int mask16 = _mm_movemask_epi8(mask); +- unsigned char mask0 = (unsigned char)(mask16 & 255); +- unsigned char mask1 = (unsigned char)(mask16 >> 8); +- +- __m128i shuf = decodeShuffleMask(mask0, mask1); +- +- __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +- +- return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +- } +- +- case 3: +- { +- __m128i result = _mm_loadu_si128(reinterpret_cast(data)); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +- +- return data + 16; +- } +- +- default: +- assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value +- return data; +- } +-} +-#endif +- +-#ifdef SIMD_AVX +-static const __m128i decodeBytesGroupConfig[] = { +- _mm_set1_epi8(3), +- _mm_set1_epi8(15), +- _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24), +- _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56), +-}; +- +-static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +-{ +- switch (bitslog2) +- { +- case 0: +- { +- __m128i result = _mm_setzero_si128(); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +- +- return data; +- } +- +- case 1: +- case 2: +- { +- const unsigned char* skip = data + (bitslog2 << 2); +- +- __m128i selb = _mm_loadl_epi64(reinterpret_cast(data)); +- __m128i rest = _mm_loadu_si128(reinterpret_cast(skip)); +- +- __m128i sent = decodeBytesGroupConfig[bitslog2 - 1]; +- __m128i ctrl = decodeBytesGroupConfig[bitslog2 + 1]; +- +- __m128i selw = _mm_shuffle_epi32(selb, 0x44); +- __m128i sel = _mm_and_si128(sent, _mm_multishift_epi64_epi8(ctrl, selw)); +- __mmask16 mask16 = _mm_cmp_epi8_mask(sel, sent, _MM_CMPINT_EQ); +- +- __m128i result = _mm_mask_expand_epi8(sel, mask16, rest); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +- +- return skip + _mm_popcnt_u32(mask16); +- } +- +- case 3: +- { +- __m128i result = _mm_loadu_si128(reinterpret_cast(data)); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +- +- return data + 16; +- } +- +- default: +- assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value +- return data; +- } +-} +-#endif +- +-#ifdef SIMD_NEON +-static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8_t rest0, uint8x8_t rest1) +-{ +- uint8x8_t sm0 = vld1_u8(kDecodeBytesGroupShuffle[mask0]); +- uint8x8_t sm1 = vld1_u8(kDecodeBytesGroupShuffle[mask1]); +- +- uint8x8_t r0 = vtbl1_u8(rest0, sm0); +- uint8x8_t r1 = vtbl1_u8(rest1, sm1); +- +- return vcombine_u8(r0, r1); +-} +- +-static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1) +-{ +- static const unsigned char byte_mask_data[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128}; +- +- uint8x16_t byte_mask = vld1q_u8(byte_mask_data); +- uint8x16_t masked = vandq_u8(mask, byte_mask); +- +-#ifdef __aarch64__ +- // aarch64 has horizontal sums; MSVC doesn't expose this via arm64_neon.h so this path is exclusive to clang/gcc +- mask0 = vaddv_u8(vget_low_u8(masked)); +- mask1 = vaddv_u8(vget_high_u8(masked)); +-#else +- // we need horizontal sums of each half of masked, which can be done in 3 steps (yielding sums of sizes 2, 4, 8) +- uint8x8_t sum1 = vpadd_u8(vget_low_u8(masked), vget_high_u8(masked)); +- uint8x8_t sum2 = vpadd_u8(sum1, sum1); +- uint8x8_t sum3 = vpadd_u8(sum2, sum2); +- +- mask0 = vget_lane_u8(sum3, 0); +- mask1 = vget_lane_u8(sum3, 1); +-#endif +-} +- +-static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +-{ +- switch (bitslog2) +- { +- case 0: +- { +- uint8x16_t result = vdupq_n_u8(0); +- +- vst1q_u8(buffer, result); +- +- return data; +- } +- +- case 1: +- { +- uint8x8_t sel2 = vld1_u8(data); +- uint8x8_t sel22 = vzip_u8(vshr_n_u8(sel2, 4), sel2).val[0]; +- uint8x8x2_t sel2222 = vzip_u8(vshr_n_u8(sel22, 2), sel22); +- uint8x16_t sel = vandq_u8(vcombine_u8(sel2222.val[0], sel2222.val[1]), vdupq_n_u8(3)); +- +- uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(3)); +- unsigned char mask0, mask1; +- neonMoveMask(mask, mask0, mask1); +- +- uint8x8_t rest0 = vld1_u8(data + 4); +- uint8x8_t rest1 = vld1_u8(data + 4 + kDecodeBytesGroupCount[mask0]); +- +- uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); +- +- vst1q_u8(buffer, result); +- +- return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +- } +- +- case 2: +- { +- uint8x8_t sel4 = vld1_u8(data); +- uint8x8x2_t sel44 = vzip_u8(vshr_n_u8(sel4, 4), vand_u8(sel4, vdup_n_u8(15))); +- uint8x16_t sel = vcombine_u8(sel44.val[0], sel44.val[1]); +- +- uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(15)); +- unsigned char mask0, mask1; +- neonMoveMask(mask, mask0, mask1); +- +- uint8x8_t rest0 = vld1_u8(data + 8); +- uint8x8_t rest1 = vld1_u8(data + 8 + kDecodeBytesGroupCount[mask0]); +- +- uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); +- +- vst1q_u8(buffer, result); +- +- return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +- } +- +- case 3: +- { +- uint8x16_t result = vld1q_u8(data); +- +- vst1q_u8(buffer, result); +- +- return data + 16; +- } +- +- default: +- assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value +- return data; +- } +-} +-#endif +- +-#ifdef SIMD_WASM +-SIMD_TARGET +-static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) +-{ +- v128_t sm0 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask0]); +- v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]); +- +- v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]); +- sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +- +- v128_t sm1r = wasm_i8x16_add(sm1, sm1off); +- +- return wasmx_unpacklo_v64x2(sm0, sm1r); +-} +- +-SIMD_TARGET +-static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) +-{ +- // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 +- const uint64_t magic = 0x000103070f1f3f80ull; +- +- // TODO: This can use v8x16_bitmask in the future +- mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56); +- mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56); +-} +- +-SIMD_TARGET +-static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +-{ +- unsigned char byte, enc, encv; +- const unsigned char* data_var; +- +- switch (bitslog2) +- { +- case 0: +- { +- v128_t result = wasm_i8x16_splat(0); +- +- wasm_v128_store(buffer, result); +- +- return data; +- } +- +- case 1: +- { +- v128_t sel2 = wasm_v128_load(data); +- v128_t rest = wasm_v128_load(data + 4); +- +- v128_t sel22 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel2, 4), sel2); +- v128_t sel2222 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel22, 2), sel22); +- v128_t sel = wasm_v128_and(sel2222, wasm_i8x16_splat(3)); +- +- v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(3)); +- +- unsigned char mask0, mask1; +- wasmMoveMask(mask, mask0, mask1); +- +- v128_t shuf = decodeShuffleMask(mask0, mask1); +- +- v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); +- +- wasm_v128_store(buffer, result); +- +- return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +- } +- +- case 2: +- { +- v128_t sel4 = wasm_v128_load(data); +- v128_t rest = wasm_v128_load(data + 8); +- +- v128_t sel44 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel4, 4), sel4); +- v128_t sel = wasm_v128_and(sel44, wasm_i8x16_splat(15)); +- +- v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(15)); +- +- unsigned char mask0, mask1; +- wasmMoveMask(mask, mask0, mask1); +- +- v128_t shuf = decodeShuffleMask(mask0, mask1); +- +- v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); +- +- wasm_v128_store(buffer, result); +- +- return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +- } +- +- case 3: +- { +- v128_t result = wasm_v128_load(data); +- +- wasm_v128_store(buffer, result); +- +- return data + 16; +- } +- +- default: +- assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value +- return data; +- } +-} +-#endif +- +-#if defined(SIMD_SSE) || defined(SIMD_AVX) +-SIMD_TARGET +-static void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3) +-{ +- __m128i t0 = _mm_unpacklo_epi8(x0, x1); +- __m128i t1 = _mm_unpackhi_epi8(x0, x1); +- __m128i t2 = _mm_unpacklo_epi8(x2, x3); +- __m128i t3 = _mm_unpackhi_epi8(x2, x3); +- +- x0 = _mm_unpacklo_epi16(t0, t2); +- x1 = _mm_unpackhi_epi16(t0, t2); +- x2 = _mm_unpacklo_epi16(t1, t3); +- x3 = _mm_unpackhi_epi16(t1, t3); +-} +- +-SIMD_TARGET +-static __m128i unzigzag8(__m128i v) +-{ +- __m128i xl = _mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(v, _mm_set1_epi8(1))); +- __m128i xr = _mm_and_si128(_mm_srli_epi16(v, 1), _mm_set1_epi8(127)); +- +- return _mm_xor_si128(xl, xr); +-} +-#endif +- +-#ifdef SIMD_NEON +-static void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_t& x3) +-{ +- uint8x16x2_t t01 = vzipq_u8(x0, x1); +- uint8x16x2_t t23 = vzipq_u8(x2, x3); +- +- uint16x8x2_t x01 = vzipq_u16(vreinterpretq_u16_u8(t01.val[0]), vreinterpretq_u16_u8(t23.val[0])); +- uint16x8x2_t x23 = vzipq_u16(vreinterpretq_u16_u8(t01.val[1]), vreinterpretq_u16_u8(t23.val[1])); +- +- x0 = vreinterpretq_u8_u16(x01.val[0]); +- x1 = vreinterpretq_u8_u16(x01.val[1]); +- x2 = vreinterpretq_u8_u16(x23.val[0]); +- x3 = vreinterpretq_u8_u16(x23.val[1]); +-} +- +-static uint8x16_t unzigzag8(uint8x16_t v) +-{ +- uint8x16_t xl = vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(vandq_u8(v, vdupq_n_u8(1))))); +- uint8x16_t xr = vshrq_n_u8(v, 1); +- +- return veorq_u8(xl, xr); +-} +-#endif +- +-#ifdef SIMD_WASM +-SIMD_TARGET +-static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3) +-{ +- v128_t t0 = wasmx_unpacklo_v8x16(x0, x1); +- v128_t t1 = wasmx_unpackhi_v8x16(x0, x1); +- v128_t t2 = wasmx_unpacklo_v8x16(x2, x3); +- v128_t t3 = wasmx_unpackhi_v8x16(x2, x3); +- +- x0 = wasmx_unpacklo_v16x8(t0, t2); +- x1 = wasmx_unpackhi_v16x8(t0, t2); +- x2 = wasmx_unpacklo_v16x8(t1, t3); +- x3 = wasmx_unpackhi_v16x8(t1, t3); +-} +- +-SIMD_TARGET +-static v128_t unzigzag8(v128_t v) +-{ +- v128_t xl = wasm_i8x16_neg(wasm_v128_and(v, wasm_i8x16_splat(1))); +- v128_t xr = wasm_u8x16_shr(v, 1); +- +- return wasm_v128_xor(xl, xr); +-} +-#endif +- +-#if defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) +-SIMD_TARGET +-static const unsigned char* decodeBytesSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +-{ +- assert(buffer_size % kByteGroupSize == 0); +- assert(kByteGroupSize == 16); +- +- const unsigned char* header = data; +- +- // round number of groups to 4 to get number of header bytes +- size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; +- +- if (size_t(data_end - data) < header_size) +- return 0; +- +- data += header_size; +- +- size_t i = 0; +- +- // fast-path: process 4 groups at a time, do a shared bounds check - each group reads <=24b +- for (; i + kByteGroupSize * 4 <= buffer_size && size_t(data_end - data) >= kByteGroupDecodeLimit * 4; i += kByteGroupSize * 4) +- { +- size_t header_offset = i / kByteGroupSize; +- unsigned char header_byte = header[header_offset / 4]; +- +- data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 0, (header_byte >> 0) & 3); +- data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 1, (header_byte >> 2) & 3); +- data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 2, (header_byte >> 4) & 3); +- data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 3, (header_byte >> 6) & 3); +- } +- +- // slow-path: process remaining groups +- for (; i < buffer_size; i += kByteGroupSize) +- { +- if (size_t(data_end - data) < kByteGroupDecodeLimit) +- return 0; +- +- size_t header_offset = i / kByteGroupSize; +- +- int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; +- +- data = decodeBytesGroupSimd(data, buffer + i, bitslog2); +- } +- +- return data; +-} +- +-SIMD_TARGET +-static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +-{ +- assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); +- +- unsigned char buffer[kVertexBlockMaxSize * 4]; +- unsigned char transposed[kVertexBlockSizeBytes]; +- +- size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); +- +- for (size_t k = 0; k < vertex_size; k += 4) +- { +- for (size_t j = 0; j < 4; ++j) +- { +- data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned); +- if (!data) +- return 0; +- } +- +-#if defined(SIMD_SSE) || defined(SIMD_AVX) +-#define TEMP __m128i +-#define PREP() __m128i pi = _mm_cvtsi32_si128(*reinterpret_cast(last_vertex + k)) +-#define LOAD(i) __m128i r##i = _mm_loadu_si128(reinterpret_cast(buffer + j + i * vertex_count_aligned)) +-#define GRP4(i) t0 = _mm_shuffle_epi32(r##i, 0), t1 = _mm_shuffle_epi32(r##i, 1), t2 = _mm_shuffle_epi32(r##i, 2), t3 = _mm_shuffle_epi32(r##i, 3) +-#define FIXD(i) t##i = pi = _mm_add_epi8(pi, t##i) +-#define SAVE(i) *reinterpret_cast(savep) = _mm_cvtsi128_si32(t##i), savep += vertex_size +-#endif +- +-#ifdef SIMD_NEON +-#define TEMP uint8x8_t +-#define PREP() uint8x8_t pi = vreinterpret_u8_u32(vld1_lane_u32(reinterpret_cast(last_vertex + k), vdup_n_u32(0), 0)) +-#define LOAD(i) uint8x16_t r##i = vld1q_u8(buffer + j + i * vertex_count_aligned) +-#define GRP4(i) t0 = vget_low_u8(r##i), t1 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t0), 1)), t2 = vget_high_u8(r##i), t3 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t2), 1)) +-#define FIXD(i) t##i = pi = vadd_u8(pi, t##i) +-#define SAVE(i) vst1_lane_u32(reinterpret_cast(savep), vreinterpret_u32_u8(t##i), 0), savep += vertex_size +-#endif +- +-#ifdef SIMD_WASM +-#define TEMP v128_t +-#define PREP() v128_t pi = wasm_v128_load(last_vertex + k) +-#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned) +-#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3) +-#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i) +-#define SAVE(i) *reinterpret_cast(savep) = wasm_i32x4_extract_lane(t##i, 0), savep += vertex_size +-#endif +- +- PREP(); +- +- unsigned char* savep = transposed + k; +- +- for (size_t j = 0; j < vertex_count_aligned; j += 16) +- { +- LOAD(0); +- LOAD(1); +- LOAD(2); +- LOAD(3); +- +- r0 = unzigzag8(r0); +- r1 = unzigzag8(r1); +- r2 = unzigzag8(r2); +- r3 = unzigzag8(r3); +- +- transpose8(r0, r1, r2, r3); +- +- TEMP t0, t1, t2, t3; +- +- GRP4(0); +- FIXD(0), FIXD(1), FIXD(2), FIXD(3); +- SAVE(0), SAVE(1), SAVE(2), SAVE(3); +- +- GRP4(1); +- FIXD(0), FIXD(1), FIXD(2), FIXD(3); +- SAVE(0), SAVE(1), SAVE(2), SAVE(3); +- +- GRP4(2); +- FIXD(0), FIXD(1), FIXD(2), FIXD(3); +- SAVE(0), SAVE(1), SAVE(2), SAVE(3); +- +- GRP4(3); +- FIXD(0), FIXD(1), FIXD(2), FIXD(3); +- SAVE(0), SAVE(1), SAVE(2), SAVE(3); +- +-#undef TEMP +-#undef PREP +-#undef LOAD +-#undef GRP4 +-#undef FIXD +-#undef SAVE +- } +- } +- +- memcpy(vertex_data, transposed, vertex_count * vertex_size); +- +- memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); +- +- return data; +-} +-#endif +- +-#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +-static unsigned int getCpuFeatures() +-{ +- int cpuinfo[4] = {}; +-#ifdef _MSC_VER +- __cpuid(cpuinfo, 1); +-#else +- __cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); +-#endif +- return cpuinfo[2]; +-} +- +-static unsigned int cpuid = getCpuFeatures(); +-#endif +- +-} // namespace meshopt +- +-size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size) +-{ +- using namespace meshopt; +- +- assert(vertex_size > 0 && vertex_size <= 256); +- assert(vertex_size % 4 == 0); +- +- const unsigned char* vertex_data = static_cast(vertices); +- +- unsigned char* data = buffer; +- unsigned char* data_end = buffer + buffer_size; +- +- if (size_t(data_end - data) < 1 + vertex_size) +- return 0; +- +- int version = gEncodeVertexVersion; +- +- *data++ = (unsigned char)(kVertexHeader | version); +- +- unsigned char first_vertex[256] = {}; +- if (vertex_count > 0) +- memcpy(first_vertex, vertex_data, vertex_size); +- +- unsigned char last_vertex[256] = {}; +- memcpy(last_vertex, first_vertex, vertex_size); +- +- size_t vertex_block_size = getVertexBlockSize(vertex_size); +- +- size_t vertex_offset = 0; +- +- while (vertex_offset < vertex_count) +- { +- size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; +- +- data = encodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); +- if (!data) +- return 0; +- +- vertex_offset += block_size; +- } +- +- size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; +- +- if (size_t(data_end - data) < tail_size) +- return 0; +- +- // write first vertex to the end of the stream and pad it to 32 bytes; this is important to simplify bounds checks in decoder +- if (vertex_size < kTailMaxSize) +- { +- memset(data, 0, kTailMaxSize - vertex_size); +- data += kTailMaxSize - vertex_size; +- } +- +- memcpy(data, first_vertex, vertex_size); +- data += vertex_size; +- +- assert(data >= buffer + tail_size); +- assert(data <= buffer + buffer_size); +- +- return data - buffer; +-} +- +-size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) +-{ +- using namespace meshopt; +- +- assert(vertex_size > 0 && vertex_size <= 256); +- assert(vertex_size % 4 == 0); +- +- size_t vertex_block_size = getVertexBlockSize(vertex_size); +- size_t vertex_block_count = (vertex_count + vertex_block_size - 1) / vertex_block_size; +- +- size_t vertex_block_header_size = (vertex_block_size / kByteGroupSize + 3) / 4; +- size_t vertex_block_data_size = vertex_block_size; +- +- size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; +- +- return 1 + vertex_block_count * vertex_size * (vertex_block_header_size + vertex_block_data_size) + tail_size; +-} +- +-void meshopt_encodeVertexVersion(int version) +-{ +- assert(unsigned(version) <= 0); +- +- meshopt::gEncodeVertexVersion = version; +-} +- +-int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size) +-{ +- using namespace meshopt; +- +- assert(vertex_size > 0 && vertex_size <= 256); +- assert(vertex_size % 4 == 0); +- +- const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = 0; +- +-#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +- decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock; +-#elif defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) +- decode = decodeVertexBlockSimd; +-#else +- decode = decodeVertexBlock; +-#endif +- +-#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +- assert(gDecodeBytesGroupInitialized); +- (void)gDecodeBytesGroupInitialized; +-#endif +- +- unsigned char* vertex_data = static_cast(destination); +- +- const unsigned char* data = buffer; +- const unsigned char* data_end = buffer + buffer_size; +- +- if (size_t(data_end - data) < 1 + vertex_size) +- return -2; +- +- unsigned char data_header = *data++; +- +- if ((data_header & 0xf0) != kVertexHeader) +- return -1; +- +- int version = data_header & 0x0f; +- if (version > 0) +- return -1; +- +- unsigned char last_vertex[256]; +- memcpy(last_vertex, data_end - vertex_size, vertex_size); +- +- size_t vertex_block_size = getVertexBlockSize(vertex_size); +- +- size_t vertex_offset = 0; +- +- while (vertex_offset < vertex_count) +- { +- size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; +- +- data = decode(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); +- if (!data) +- return -2; +- +- vertex_offset += block_size; +- } +- +- size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; +- +- if (size_t(data_end - data) != tail_size) +- return -3; +- +- return 0; +-} +- +-#undef SIMD_NEON +-#undef SIMD_SSE +-#undef SIMD_AVX +-#undef SIMD_WASM +-#undef SIMD_FALLBACK +-#undef SIMD_TARGET +diff --git a/src/3rdparty/meshoptimizer/src/vertexfilter.cpp b/src/3rdparty/meshoptimizer/src/vertexfilter.cpp +deleted file mode 100644 +index 606a280..0000000 +--- a/src/3rdparty/meshoptimizer/src/vertexfilter.cpp ++++ /dev/null +@@ -1,962 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-// The block below auto-detects SIMD ISA that can be used on the target platform +-#ifndef MESHOPTIMIZER_NO_SIMD +- +-// The SIMD implementation requires SSE2, which can be enabled unconditionally through compiler settings +-#if defined(__SSE2__) +-#define SIMD_SSE +-#endif +- +-// MSVC supports compiling SSE2 code regardless of compile options; we assume all 32-bit CPUs support SSE2 +-#if !defined(SIMD_SSE) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) +-#define SIMD_SSE +-#endif +- +-// GCC/clang define these when NEON support is available +-#if defined(__ARM_NEON__) || defined(__ARM_NEON) +-#define SIMD_NEON +-#endif +- +-// On MSVC, we assume that ARM builds always target NEON-capable devices +-#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) +-#define SIMD_NEON +-#endif +- +-// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD +-#if defined(__wasm_simd128__) +-#define SIMD_WASM +-#endif +- +-#endif // !MESHOPTIMIZER_NO_SIMD +- +-#ifdef SIMD_SSE +-#include +-#include +-#endif +- +-#ifdef _MSC_VER +-#include +-#endif +- +-#ifdef SIMD_NEON +-#if defined(_MSC_VER) && defined(_M_ARM64) +-#include +-#else +-#include +-#endif +-#endif +- +-#ifdef SIMD_WASM +-#undef __DEPRECATED +-#include +-#endif +- +-#ifdef SIMD_WASM +-#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +-#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +-#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6) +-#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7) +-#endif +- +-namespace meshopt +-{ +- +-#if !defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_WASM) +-template +-static void decodeFilterOct(T* data, size_t count) +-{ +- const float max = float((1 << (sizeof(T) * 8 - 1)) - 1); +- +- for (size_t i = 0; i < count; ++i) +- { +- // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count +- float x = float(data[i * 4 + 0]); +- float y = float(data[i * 4 + 1]); +- float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y); +- +- // fixup octahedral coordinates for z<0 +- float t = (z >= 0.f) ? 0.f : z; +- +- x += (x >= 0.f) ? t : -t; +- y += (y >= 0.f) ? t : -t; +- +- // compute normal length & scale +- float l = sqrtf(x * x + y * y + z * z); +- float s = max / l; +- +- // rounded signed float->int +- int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f)); +- int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f)); +- int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f)); +- +- data[i * 4 + 0] = T(xf); +- data[i * 4 + 1] = T(yf); +- data[i * 4 + 2] = T(zf); +- } +-} +- +-static void decodeFilterQuat(short* data, size_t count) +-{ +- const float scale = 1.f / sqrtf(2.f); +- +- for (size_t i = 0; i < count; ++i) +- { +- // recover scale from the high byte of the component +- int sf = data[i * 4 + 3] | 3; +- float ss = scale / float(sf); +- +- // convert x/y/z to [-1..1] (scaled...) +- float x = float(data[i * 4 + 0]) * ss; +- float y = float(data[i * 4 + 1]) * ss; +- float z = float(data[i * 4 + 2]) * ss; +- +- // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors +- float ww = 1.f - x * x - y * y - z * z; +- float w = sqrtf(ww >= 0.f ? ww : 0.f); +- +- // rounded signed float->int +- int xf = int(x * 32767.f + (x >= 0.f ? 0.5f : -0.5f)); +- int yf = int(y * 32767.f + (y >= 0.f ? 0.5f : -0.5f)); +- int zf = int(z * 32767.f + (z >= 0.f ? 0.5f : -0.5f)); +- int wf = int(w * 32767.f + 0.5f); +- +- int qc = data[i * 4 + 3] & 3; +- +- // output order is dictated by input index +- data[i * 4 + ((qc + 1) & 3)] = short(xf); +- data[i * 4 + ((qc + 2) & 3)] = short(yf); +- data[i * 4 + ((qc + 3) & 3)] = short(zf); +- data[i * 4 + ((qc + 0) & 3)] = short(wf); +- } +-} +- +-static void decodeFilterExp(unsigned int* data, size_t count) +-{ +- for (size_t i = 0; i < count; ++i) +- { +- unsigned int v = data[i]; +- +- // decode mantissa and exponent +- int m = int(v << 8) >> 8; +- int e = int(v) >> 24; +- +- union +- { +- float f; +- unsigned int ui; +- } u; +- +- // optimized version of ldexp(float(m), e) +- u.ui = unsigned(e + 127) << 23; +- u.f = u.f * float(m); +- +- data[i] = u.ui; +- } +-} +-#endif +- +-#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +-template +-static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride) +-{ +- assert(stride <= 4); +- +- size_t count4 = count & ~size_t(3); +- process(data, count4); +- +- if (count4 < count) +- { +- T tail[4 * 4] = {}; // max stride 4, max count 4 +- size_t tail_size = (count - count4) * stride * sizeof(T); +- assert(tail_size <= sizeof(tail)); +- +- memcpy(tail, data + count4 * stride, tail_size); +- process(tail, count - count4); +- memcpy(data + count4 * stride, tail, tail_size); +- } +-} +- +-inline uint64_t rotateleft64(uint64_t v, int x) +-{ +-#if defined(_MSC_VER) && !defined(__clang__) +- return _rotl64(v, x); +-// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for +-// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions +-#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11) +- return __builtin_rotateleft64(v, x); +-#else +- return (v << (x & 63)) | (v >> ((64 - x) & 63)); +-#endif +-} +-#endif +- +-#ifdef SIMD_SSE +-static void decodeFilterOctSimd(signed char* data, size_t count) +-{ +- const __m128 sign = _mm_set1_ps(-0.f); +- +- for (size_t i = 0; i < count; i += 4) +- { +- __m128i n4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4])); +- +- // sign-extends each of x,y in [x y ? ?] with arithmetic shifts +- __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 24), 24); +- __m128i yf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 24); +- +- // unpack z; note that z is unsigned so we technically don't need to sign extend it +- __m128i zf = _mm_srai_epi32(_mm_slli_epi32(n4, 8), 24); +- +- // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count +- __m128 x = _mm_cvtepi32_ps(xf); +- __m128 y = _mm_cvtepi32_ps(yf); +- __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y))); +- +- // fixup octahedral coordinates for z<0 +- __m128 t = _mm_min_ps(z, _mm_setzero_ps()); +- +- x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign))); +- y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign))); +- +- // compute normal length & scale +- __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))); +- __m128 s = _mm_mul_ps(_mm_set1_ps(127.f), _mm_rsqrt_ps(ll)); +- +- // rounded signed float->int +- __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); +- __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); +- __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); +- +- // combine xr/yr/zr into final value +- __m128i res = _mm_and_si128(n4, _mm_set1_epi32(0xff000000)); +- res = _mm_or_si128(res, _mm_and_si128(xr, _mm_set1_epi32(0xff))); +- res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(yr, _mm_set1_epi32(0xff)), 8)); +- res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(zr, _mm_set1_epi32(0xff)), 16)); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res); +- } +-} +- +-static void decodeFilterOctSimd(short* data, size_t count) +-{ +- const __m128 sign = _mm_set1_ps(-0.f); +- +- for (size_t i = 0; i < count; i += 4) +- { +- __m128 n4_0 = _mm_loadu_ps(reinterpret_cast(&data[(i + 0) * 4])); +- __m128 n4_1 = _mm_loadu_ps(reinterpret_cast(&data[(i + 2) * 4])); +- +- // gather both x/y 16-bit pairs in each 32-bit lane +- __m128i n4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(2, 0, 2, 0))); +- +- // sign-extends each of x,y in [x y] with arithmetic shifts +- __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 16); +- __m128i yf = _mm_srai_epi32(n4, 16); +- +- // unpack z; note that z is unsigned so we don't need to sign extend it +- __m128i z4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(3, 1, 3, 1))); +- __m128i zf = _mm_and_si128(z4, _mm_set1_epi32(0x7fff)); +- +- // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count +- __m128 x = _mm_cvtepi32_ps(xf); +- __m128 y = _mm_cvtepi32_ps(yf); +- __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y))); +- +- // fixup octahedral coordinates for z<0 +- __m128 t = _mm_min_ps(z, _mm_setzero_ps()); +- +- x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign))); +- y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign))); +- +- // compute normal length & scale +- __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))); +- __m128 s = _mm_div_ps(_mm_set1_ps(32767.f), _mm_sqrt_ps(ll)); +- +- // rounded signed float->int +- __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); +- __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); +- __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); +- +- // mix x/z and y/0 to make 16-bit unpack easier +- __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16)); +- __m128i y0r = _mm_and_si128(yr, _mm_set1_epi32(0xffff)); +- +- // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w +- __m128i res_0 = _mm_unpacklo_epi16(xzr, y0r); +- __m128i res_1 = _mm_unpackhi_epi16(xzr, y0r); +- +- // patch in .w +- res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000))); +- res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000))); +- +- _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0); +- _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1); +- } +-} +- +-static void decodeFilterQuatSimd(short* data, size_t count) +-{ +- const float scale = 1.f / sqrtf(2.f); +- +- for (size_t i = 0; i < count; i += 4) +- { +- __m128 q4_0 = _mm_loadu_ps(reinterpret_cast(&data[(i + 0) * 4])); +- __m128 q4_1 = _mm_loadu_ps(reinterpret_cast(&data[(i + 2) * 4])); +- +- // gather both x/y 16-bit pairs in each 32-bit lane +- __m128i q4_xy = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(2, 0, 2, 0))); +- __m128i q4_zc = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(3, 1, 3, 1))); +- +- // sign-extends each of x,y in [x y] with arithmetic shifts +- __m128i xf = _mm_srai_epi32(_mm_slli_epi32(q4_xy, 16), 16); +- __m128i yf = _mm_srai_epi32(q4_xy, 16); +- __m128i zf = _mm_srai_epi32(_mm_slli_epi32(q4_zc, 16), 16); +- __m128i cf = _mm_srai_epi32(q4_zc, 16); +- +- // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) +- __m128i sf = _mm_or_si128(cf, _mm_set1_epi32(3)); +- __m128 ss = _mm_div_ps(_mm_set1_ps(scale), _mm_cvtepi32_ps(sf)); +- +- // convert x/y/z to [-1..1] (scaled...) +- __m128 x = _mm_mul_ps(_mm_cvtepi32_ps(xf), ss); +- __m128 y = _mm_mul_ps(_mm_cvtepi32_ps(yf), ss); +- __m128 z = _mm_mul_ps(_mm_cvtepi32_ps(zf), ss); +- +- // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors +- __m128 ww = _mm_sub_ps(_mm_set1_ps(1.f), _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)))); +- __m128 w = _mm_sqrt_ps(_mm_max_ps(ww, _mm_setzero_ps())); +- +- __m128 s = _mm_set1_ps(32767.f); +- +- // rounded signed float->int +- __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); +- __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); +- __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); +- __m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, s)); +- +- // mix x/z and w/y to make 16-bit unpack easier +- __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16)); +- __m128i wyr = _mm_or_si128(_mm_and_si128(wr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(yr, 16)); +- +- // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) +- __m128i res_0 = _mm_unpacklo_epi16(wyr, xzr); +- __m128i res_1 = _mm_unpackhi_epi16(wyr, xzr); +- +- // store results to stack so that we can rotate using scalar instructions +- uint64_t res[4]; +- _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[0]), res_0); +- _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[2]), res_1); +- +- // rotate and store +- uint64_t* out = reinterpret_cast(&data[i * 4]); +- +- out[0] = rotateleft64(res[0], data[(i + 0) * 4 + 3] << 4); +- out[1] = rotateleft64(res[1], data[(i + 1) * 4 + 3] << 4); +- out[2] = rotateleft64(res[2], data[(i + 2) * 4 + 3] << 4); +- out[3] = rotateleft64(res[3], data[(i + 3) * 4 + 3] << 4); +- } +-} +- +-static void decodeFilterExpSimd(unsigned int* data, size_t count) +-{ +- for (size_t i = 0; i < count; i += 4) +- { +- __m128i v = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i])); +- +- // decode exponent into 2^x directly +- __m128i ef = _mm_srai_epi32(v, 24); +- __m128i es = _mm_slli_epi32(_mm_add_epi32(ef, _mm_set1_epi32(127)), 23); +- +- // decode 24-bit mantissa into floating-point value +- __m128i mf = _mm_srai_epi32(_mm_slli_epi32(v, 8), 8); +- __m128 m = _mm_cvtepi32_ps(mf); +- +- __m128 r = _mm_mul_ps(_mm_castsi128_ps(es), m); +- +- _mm_storeu_ps(reinterpret_cast(&data[i]), r); +- } +-} +-#endif +- +-#if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64) +-inline float32x4_t vsqrtq_f32(float32x4_t x) +-{ +- float32x4_t r = vrsqrteq_f32(x); +- r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(r, x), r)); // refine rsqrt estimate +- return vmulq_f32(r, x); +-} +- +-inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y) +-{ +- float32x4_t r = vrecpeq_f32(y); +- r = vmulq_f32(r, vrecpsq_f32(y, r)); // refine rcp estimate +- return vmulq_f32(x, r); +-} +-#endif +- +-#ifdef SIMD_NEON +-static void decodeFilterOctSimd(signed char* data, size_t count) +-{ +- const int32x4_t sign = vdupq_n_s32(0x80000000); +- +- for (size_t i = 0; i < count; i += 4) +- { +- int32x4_t n4 = vld1q_s32(reinterpret_cast(&data[i * 4])); +- +- // sign-extends each of x,y in [x y ? ?] with arithmetic shifts +- int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 24), 24); +- int32x4_t yf = vshrq_n_s32(vshlq_n_s32(n4, 16), 24); +- +- // unpack z; note that z is unsigned so we technically don't need to sign extend it +- int32x4_t zf = vshrq_n_s32(vshlq_n_s32(n4, 8), 24); +- +- // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count +- float32x4_t x = vcvtq_f32_s32(xf); +- float32x4_t y = vcvtq_f32_s32(yf); +- float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y))); +- +- // fixup octahedral coordinates for z<0 +- float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f)); +- +- x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign)))); +- y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); +- +- // compute normal length & scale +- float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); +- float32x4_t rl = vrsqrteq_f32(ll); +- float32x4_t s = vmulq_f32(vdupq_n_f32(127.f), rl); +- +- // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value +- // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction +- const float32x4_t fsnap = vdupq_n_f32(3 << 22); +- +- int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); +- int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); +- int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); +- +- // combine xr/yr/zr into final value +- int32x4_t res = vandq_s32(n4, vdupq_n_s32(0xff000000)); +- res = vorrq_s32(res, vandq_s32(xr, vdupq_n_s32(0xff))); +- res = vorrq_s32(res, vshlq_n_s32(vandq_s32(yr, vdupq_n_s32(0xff)), 8)); +- res = vorrq_s32(res, vshlq_n_s32(vandq_s32(zr, vdupq_n_s32(0xff)), 16)); +- +- vst1q_s32(reinterpret_cast(&data[i * 4]), res); +- } +-} +- +-static void decodeFilterOctSimd(short* data, size_t count) +-{ +- const int32x4_t sign = vdupq_n_s32(0x80000000); +- +- for (size_t i = 0; i < count; i += 4) +- { +- int32x4_t n4_0 = vld1q_s32(reinterpret_cast(&data[(i + 0) * 4])); +- int32x4_t n4_1 = vld1q_s32(reinterpret_cast(&data[(i + 2) * 4])); +- +- // gather both x/y 16-bit pairs in each 32-bit lane +- int32x4_t n4 = vuzpq_s32(n4_0, n4_1).val[0]; +- +- // sign-extends each of x,y in [x y] with arithmetic shifts +- int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 16), 16); +- int32x4_t yf = vshrq_n_s32(n4, 16); +- +- // unpack z; note that z is unsigned so we don't need to sign extend it +- int32x4_t z4 = vuzpq_s32(n4_0, n4_1).val[1]; +- int32x4_t zf = vandq_s32(z4, vdupq_n_s32(0x7fff)); +- +- // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count +- float32x4_t x = vcvtq_f32_s32(xf); +- float32x4_t y = vcvtq_f32_s32(yf); +- float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y))); +- +- // fixup octahedral coordinates for z<0 +- float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f)); +- +- x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign)))); +- y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); +- +- // compute normal length & scale +- float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); +- float32x4_t rl = vrsqrteq_f32(ll); +- rl = vmulq_f32(rl, vrsqrtsq_f32(vmulq_f32(rl, ll), rl)); // refine rsqrt estimate +- float32x4_t s = vmulq_f32(vdupq_n_f32(32767.f), rl); +- +- // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value +- // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction +- const float32x4_t fsnap = vdupq_n_f32(3 << 22); +- +- int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); +- int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); +- int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); +- +- // mix x/z and y/0 to make 16-bit unpack easier +- int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); +- int32x4_t y0r = vandq_s32(yr, vdupq_n_s32(0xffff)); +- +- // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w +- int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[0]); +- int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[1]); +- +- // patch in .w +- res_0 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_0, res_0); +- res_1 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_1, res_1); +- +- vst1q_s32(reinterpret_cast(&data[(i + 0) * 4]), res_0); +- vst1q_s32(reinterpret_cast(&data[(i + 2) * 4]), res_1); +- } +-} +- +-static void decodeFilterQuatSimd(short* data, size_t count) +-{ +- const float scale = 1.f / sqrtf(2.f); +- +- for (size_t i = 0; i < count; i += 4) +- { +- int32x4_t q4_0 = vld1q_s32(reinterpret_cast(&data[(i + 0) * 4])); +- int32x4_t q4_1 = vld1q_s32(reinterpret_cast(&data[(i + 2) * 4])); +- +- // gather both x/y 16-bit pairs in each 32-bit lane +- int32x4_t q4_xy = vuzpq_s32(q4_0, q4_1).val[0]; +- int32x4_t q4_zc = vuzpq_s32(q4_0, q4_1).val[1]; +- +- // sign-extends each of x,y in [x y] with arithmetic shifts +- int32x4_t xf = vshrq_n_s32(vshlq_n_s32(q4_xy, 16), 16); +- int32x4_t yf = vshrq_n_s32(q4_xy, 16); +- int32x4_t zf = vshrq_n_s32(vshlq_n_s32(q4_zc, 16), 16); +- int32x4_t cf = vshrq_n_s32(q4_zc, 16); +- +- // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) +- int32x4_t sf = vorrq_s32(cf, vdupq_n_s32(3)); +- float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), vcvtq_f32_s32(sf)); +- +- // convert x/y/z to [-1..1] (scaled...) +- float32x4_t x = vmulq_f32(vcvtq_f32_s32(xf), ss); +- float32x4_t y = vmulq_f32(vcvtq_f32_s32(yf), ss); +- float32x4_t z = vmulq_f32(vcvtq_f32_s32(zf), ss); +- +- // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors +- float32x4_t ww = vsubq_f32(vdupq_n_f32(1.f), vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)))); +- float32x4_t w = vsqrtq_f32(vmaxq_f32(ww, vdupq_n_f32(0.f))); +- +- float32x4_t s = vdupq_n_f32(32767.f); +- +- // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value +- // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction +- const float32x4_t fsnap = vdupq_n_f32(3 << 22); +- +- int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); +- int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); +- int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); +- int32x4_t wr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(w, s), fsnap)); +- +- // mix x/z and w/y to make 16-bit unpack easier +- int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); +- int32x4_t wyr = vorrq_s32(vandq_s32(wr, vdupq_n_s32(0xffff)), vshlq_n_s32(yr, 16)); +- +- // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) +- int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]); +- int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]); +- +- // rotate and store +- uint64_t* out = (uint64_t*)&data[i * 4]; +- +- out[0] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 0), vgetq_lane_s32(cf, 0) << 4); +- out[1] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 1), vgetq_lane_s32(cf, 1) << 4); +- out[2] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 0), vgetq_lane_s32(cf, 2) << 4); +- out[3] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 1), vgetq_lane_s32(cf, 3) << 4); +- } +-} +- +-static void decodeFilterExpSimd(unsigned int* data, size_t count) +-{ +- for (size_t i = 0; i < count; i += 4) +- { +- int32x4_t v = vld1q_s32(reinterpret_cast(&data[i])); +- +- // decode exponent into 2^x directly +- int32x4_t ef = vshrq_n_s32(v, 24); +- int32x4_t es = vshlq_n_s32(vaddq_s32(ef, vdupq_n_s32(127)), 23); +- +- // decode 24-bit mantissa into floating-point value +- int32x4_t mf = vshrq_n_s32(vshlq_n_s32(v, 8), 8); +- float32x4_t m = vcvtq_f32_s32(mf); +- +- float32x4_t r = vmulq_f32(vreinterpretq_f32_s32(es), m); +- +- vst1q_f32(reinterpret_cast(&data[i]), r); +- } +-} +-#endif +- +-#ifdef SIMD_WASM +-static void decodeFilterOctSimd(signed char* data, size_t count) +-{ +- const v128_t sign = wasm_f32x4_splat(-0.f); +- +- for (size_t i = 0; i < count; i += 4) +- { +- v128_t n4 = wasm_v128_load(&data[i * 4]); +- +- // sign-extends each of x,y in [x y ? ?] with arithmetic shifts +- v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 24), 24); +- v128_t yf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 24); +- +- // unpack z; note that z is unsigned so we technically don't need to sign extend it +- v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 8), 24); +- +- // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count +- v128_t x = wasm_f32x4_convert_i32x4(xf); +- v128_t y = wasm_f32x4_convert_i32x4(yf); +- v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y))); +- +- // fixup octahedral coordinates for z<0 +- // note: i32x4_min with 0 is equvalent to f32x4_min +- v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0)); +- +- x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign))); +- y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign))); +- +- // compute normal length & scale +- v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))); +- v128_t s = wasm_f32x4_div(wasm_f32x4_splat(127.f), wasm_f32x4_sqrt(ll)); +- +- // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value +- // note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction +- const v128_t fsnap = wasm_f32x4_splat(3 << 22); +- +- v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); +- v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); +- v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); +- +- // combine xr/yr/zr into final value +- v128_t res = wasm_v128_and(n4, wasm_i32x4_splat(0xff000000)); +- res = wasm_v128_or(res, wasm_v128_and(xr, wasm_i32x4_splat(0xff))); +- res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(yr, wasm_i32x4_splat(0xff)), 8)); +- res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(zr, wasm_i32x4_splat(0xff)), 16)); +- +- wasm_v128_store(&data[i * 4], res); +- } +-} +- +-static void decodeFilterOctSimd(short* data, size_t count) +-{ +- const v128_t sign = wasm_f32x4_splat(-0.f); +- const v128_t zmask = wasm_i32x4_splat(0x7fff); +- +- for (size_t i = 0; i < count; i += 4) +- { +- v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]); +- v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]); +- +- // gather both x/y 16-bit pairs in each 32-bit lane +- v128_t n4 = wasmx_unziplo_v32x4(n4_0, n4_1); +- +- // sign-extends each of x,y in [x y] with arithmetic shifts +- v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 16); +- v128_t yf = wasm_i32x4_shr(n4, 16); +- +- // unpack z; note that z is unsigned so we don't need to sign extend it +- v128_t z4 = wasmx_unziphi_v32x4(n4_0, n4_1); +- v128_t zf = wasm_v128_and(z4, zmask); +- +- // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count +- v128_t x = wasm_f32x4_convert_i32x4(xf); +- v128_t y = wasm_f32x4_convert_i32x4(yf); +- v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y))); +- +- // fixup octahedral coordinates for z<0 +- // note: i32x4_min with 0 is equvalent to f32x4_min +- v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0)); +- +- x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign))); +- y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign))); +- +- // compute normal length & scale +- v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))); +- v128_t s = wasm_f32x4_div(wasm_f32x4_splat(32767.f), wasm_f32x4_sqrt(ll)); +- +- // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value +- // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction +- const v128_t fsnap = wasm_f32x4_splat(3 << 22); +- +- v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); +- v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); +- v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); +- +- // mix x/z and y/0 to make 16-bit unpack easier +- v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16)); +- v128_t y0r = wasm_v128_and(yr, wasm_i32x4_splat(0xffff)); +- +- // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w +- v128_t res_0 = wasmx_unpacklo_v16x8(xzr, y0r); +- v128_t res_1 = wasmx_unpackhi_v16x8(xzr, y0r); +- +- // patch in .w +- res_0 = wasm_v128_or(res_0, wasm_v128_and(n4_0, wasm_i64x2_splat(0xffff000000000000))); +- res_1 = wasm_v128_or(res_1, wasm_v128_and(n4_1, wasm_i64x2_splat(0xffff000000000000))); +- +- wasm_v128_store(&data[(i + 0) * 4], res_0); +- wasm_v128_store(&data[(i + 2) * 4], res_1); +- } +-} +- +-static void decodeFilterQuatSimd(short* data, size_t count) +-{ +- const float scale = 1.f / sqrtf(2.f); +- +- for (size_t i = 0; i < count; i += 4) +- { +- v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]); +- v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]); +- +- // gather both x/y 16-bit pairs in each 32-bit lane +- v128_t q4_xy = wasmx_unziplo_v32x4(q4_0, q4_1); +- v128_t q4_zc = wasmx_unziphi_v32x4(q4_0, q4_1); +- +- // sign-extends each of x,y in [x y] with arithmetic shifts +- v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(q4_xy, 16), 16); +- v128_t yf = wasm_i32x4_shr(q4_xy, 16); +- v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(q4_zc, 16), 16); +- v128_t cf = wasm_i32x4_shr(q4_zc, 16); +- +- // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) +- v128_t sf = wasm_v128_or(cf, wasm_i32x4_splat(3)); +- v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), wasm_f32x4_convert_i32x4(sf)); +- +- // convert x/y/z to [-1..1] (scaled...) +- v128_t x = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(xf), ss); +- v128_t y = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(yf), ss); +- v128_t z = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(zf), ss); +- +- // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors +- // note: i32x4_max with 0 is equivalent to f32x4_max +- v128_t ww = wasm_f32x4_sub(wasm_f32x4_splat(1.f), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)))); +- v128_t w = wasm_f32x4_sqrt(wasm_i32x4_max(ww, wasm_i32x4_splat(0))); +- +- v128_t s = wasm_f32x4_splat(32767.f); +- +- // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value +- // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction +- const v128_t fsnap = wasm_f32x4_splat(3 << 22); +- +- v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); +- v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); +- v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); +- v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, s), fsnap); +- +- // mix x/z and w/y to make 16-bit unpack easier +- v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16)); +- v128_t wyr = wasm_v128_or(wasm_v128_and(wr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(yr, 16)); +- +- // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) +- v128_t res_0 = wasmx_unpacklo_v16x8(wyr, xzr); +- v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr); +- +- // compute component index shifted left by 4 (and moved into i32x4 slot) +- // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449 +- volatile v128_t cm = wasm_i32x4_shl(cf, 4); +- +- // rotate and store +- uint64_t* out = reinterpret_cast(&data[i * 4]); +- +- out[0] = rotateleft64(wasm_i64x2_extract_lane(res_0, 0), wasm_i32x4_extract_lane(cm, 0)); +- out[1] = rotateleft64(wasm_i64x2_extract_lane(res_0, 1), wasm_i32x4_extract_lane(cm, 1)); +- out[2] = rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2)); +- out[3] = rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3)); +- } +-} +- +-static void decodeFilterExpSimd(unsigned int* data, size_t count) +-{ +- for (size_t i = 0; i < count; i += 4) +- { +- v128_t v = wasm_v128_load(&data[i]); +- +- // decode exponent into 2^x directly +- v128_t ef = wasm_i32x4_shr(v, 24); +- v128_t es = wasm_i32x4_shl(wasm_i32x4_add(ef, wasm_i32x4_splat(127)), 23); +- +- // decode 24-bit mantissa into floating-point value +- v128_t mf = wasm_i32x4_shr(wasm_i32x4_shl(v, 8), 8); +- v128_t m = wasm_f32x4_convert_i32x4(mf); +- +- v128_t r = wasm_f32x4_mul(es, m); +- +- wasm_v128_store(&data[i], r); +- } +-} +-#endif +- +-} // namespace meshopt +- +-void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride) +-{ +- using namespace meshopt; +- +- assert(stride == 4 || stride == 8); +- +-#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +- if (stride == 4) +- dispatchSimd(decodeFilterOctSimd, static_cast(buffer), count, 4); +- else +- dispatchSimd(decodeFilterOctSimd, static_cast(buffer), count, 4); +-#else +- if (stride == 4) +- decodeFilterOct(static_cast(buffer), count); +- else +- decodeFilterOct(static_cast(buffer), count); +-#endif +-} +- +-void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride) +-{ +- using namespace meshopt; +- +- assert(stride == 8); +- (void)stride; +- +-#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +- dispatchSimd(decodeFilterQuatSimd, static_cast(buffer), count, 4); +-#else +- decodeFilterQuat(static_cast(buffer), count); +-#endif +-} +- +-void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride) +-{ +- using namespace meshopt; +- +- assert(stride > 0 && stride % 4 == 0); +- +-#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +- dispatchSimd(decodeFilterExpSimd, static_cast(buffer), count * (stride / 4), 1); +-#else +- decodeFilterExp(static_cast(buffer), count * (stride / 4)); +-#endif +-} +- +-void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data) +-{ +- assert(stride == 4 || stride == 8); +- assert(bits >= 1 && bits <= 16); +- +- signed char* d8 = static_cast(destination); +- short* d16 = static_cast(destination); +- +- int bytebits = int(stride * 2); +- +- for (size_t i = 0; i < count; ++i) +- { +- const float* n = &data[i * 4]; +- +- // octahedral encoding of a unit vector +- float nx = n[0], ny = n[1], nz = n[2], nw = n[3]; +- float nl = fabsf(nx) + fabsf(ny) + fabsf(nz); +- float ns = nl == 0.f ? 0.f : 1.f / nl; +- +- nx *= ns; +- ny *= ns; +- +- float u = (nz >= 0.f) ? nx : (1 - fabsf(ny)) * (nx >= 0.f ? 1.f : -1.f); +- float v = (nz >= 0.f) ? ny : (1 - fabsf(nx)) * (ny >= 0.f ? 1.f : -1.f); +- +- int fu = meshopt_quantizeSnorm(u, bits); +- int fv = meshopt_quantizeSnorm(v, bits); +- int fo = meshopt_quantizeSnorm(1.f, bits); +- int fw = meshopt_quantizeSnorm(nw, bytebits); +- +- if (stride == 4) +- { +- d8[i * 4 + 0] = (signed char)(fu); +- d8[i * 4 + 1] = (signed char)(fv); +- d8[i * 4 + 2] = (signed char)(fo); +- d8[i * 4 + 3] = (signed char)(fw); +- } +- else +- { +- d16[i * 4 + 0] = short(fu); +- d16[i * 4 + 1] = short(fv); +- d16[i * 4 + 2] = short(fo); +- d16[i * 4 + 3] = short(fw); +- } +- } +-} +- +-void meshopt_encodeFilterQuat(void* destination_, size_t count, size_t stride, int bits, const float* data) +-{ +- assert(stride == 8); +- assert(bits >= 4 && bits <= 16); +- (void)stride; +- +- short* destination = static_cast(destination_); +- +- const float scaler = sqrtf(2.f); +- +- for (size_t i = 0; i < count; ++i) +- { +- const float* q = &data[i * 4]; +- short* d = &destination[i * 4]; +- +- // establish maximum quaternion component +- int qc = 0; +- qc = fabsf(q[1]) > fabsf(q[qc]) ? 1 : qc; +- qc = fabsf(q[2]) > fabsf(q[qc]) ? 2 : qc; +- qc = fabsf(q[3]) > fabsf(q[qc]) ? 3 : qc; +- +- // we use double-cover properties to discard the sign +- float sign = q[qc] < 0.f ? -1.f : 1.f; +- +- // note: we always encode a cyclical swizzle to be able to recover the order via rotation +- d[0] = short(meshopt_quantizeSnorm(q[(qc + 1) & 3] * scaler * sign, bits)); +- d[1] = short(meshopt_quantizeSnorm(q[(qc + 2) & 3] * scaler * sign, bits)); +- d[2] = short(meshopt_quantizeSnorm(q[(qc + 3) & 3] * scaler * sign, bits)); +- d[3] = short((meshopt_quantizeSnorm(1.f, bits) & ~3) | qc); +- } +-} +- +-void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data) +-{ +- assert(stride > 0 && stride % 4 == 0); +- assert(bits >= 1 && bits <= 24); +- +- unsigned int* destination = static_cast(destination_); +- size_t stride_float = stride / sizeof(float); +- +- for (size_t i = 0; i < count; ++i) +- { +- const float* v = &data[i * stride_float]; +- unsigned int* d = &destination[i * stride_float]; +- +- // use maximum exponent to encode values; this guarantess that mantissa is [-1, 1] +- int exp = -100; +- +- for (size_t j = 0; j < stride_float; ++j) +- { +- int e; +- frexp(v[j], &e); +- +- exp = (exp < e) ? e : exp; +- } +- +- // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude) +- exp -= (bits - 1); +- +- // compute renormalized rounded mantissa for each component +- int mmask = (1 << 24) - 1; +- +- for (size_t j = 0; j < stride_float; ++j) +- { +- int m = int(ldexp(v[j], -exp) + (v[j] >= 0 ? 0.5f : -0.5f)); +- +- d[j] = (m & mmask) | (unsigned(exp) << 24); +- } +- } +-} +- +-#undef SIMD_SSE +-#undef SIMD_NEON +-#undef SIMD_WASM +diff --git a/src/3rdparty/meshoptimizer/src/vfetchanalyzer.cpp b/src/3rdparty/meshoptimizer/src/vfetchanalyzer.cpp +deleted file mode 100644 +index 51dca87..0000000 +--- a/src/3rdparty/meshoptimizer/src/vfetchanalyzer.cpp ++++ /dev/null +@@ -1,58 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +-{ +- assert(index_count % 3 == 0); +- assert(vertex_size > 0 && vertex_size <= 256); +- +- meshopt_Allocator allocator; +- +- meshopt_VertexFetchStatistics result = {}; +- +- unsigned char* vertex_visited = allocator.allocate(vertex_count); +- memset(vertex_visited, 0, vertex_count); +- +- const size_t kCacheLine = 64; +- const size_t kCacheSize = 128 * 1024; +- +- // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway +- size_t cache[kCacheSize / kCacheLine] = {}; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices[i]; +- assert(index < vertex_count); +- +- vertex_visited[index] = 1; +- +- size_t start_address = index * vertex_size; +- size_t end_address = start_address + vertex_size; +- +- size_t start_tag = start_address / kCacheLine; +- size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine; +- +- assert(start_tag < end_tag); +- +- for (size_t tag = start_tag; tag < end_tag; ++tag) +- { +- size_t line = tag % (sizeof(cache) / sizeof(cache[0])); +- +- // we store +1 since cache is filled with 0 by default +- result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine; +- cache[line] = tag + 1; +- } +- } +- +- size_t unique_vertex_count = 0; +- +- for (size_t i = 0; i < vertex_count; ++i) +- unique_vertex_count += vertex_visited[i]; +- +- result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size); +- +- return result; +-} +diff --git a/src/3rdparty/meshoptimizer/src/vfetchoptimizer.cpp b/src/3rdparty/meshoptimizer/src/vfetchoptimizer.cpp +deleted file mode 100644 +index 465d6df..0000000 +--- a/src/3rdparty/meshoptimizer/src/vfetchoptimizer.cpp ++++ /dev/null +@@ -1,74 +0,0 @@ +-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +-#include "meshoptimizer.h" +- +-#include +-#include +- +-size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +-{ +- assert(index_count % 3 == 0); +- +- memset(destination, -1, vertex_count * sizeof(unsigned int)); +- +- unsigned int next_vertex = 0; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices[i]; +- assert(index < vertex_count); +- +- if (destination[index] == ~0u) +- { +- destination[index] = next_vertex++; +- } +- } +- +- assert(next_vertex <= vertex_count); +- +- return next_vertex; +-} +- +-size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +-{ +- assert(index_count % 3 == 0); +- assert(vertex_size > 0 && vertex_size <= 256); +- +- meshopt_Allocator allocator; +- +- // support in-place optimization +- if (destination == vertices) +- { +- unsigned char* vertices_copy = allocator.allocate(vertex_count * vertex_size); +- memcpy(vertices_copy, vertices, vertex_count * vertex_size); +- vertices = vertices_copy; +- } +- +- // build vertex remap table +- unsigned int* vertex_remap = allocator.allocate(vertex_count); +- memset(vertex_remap, -1, vertex_count * sizeof(unsigned int)); +- +- unsigned int next_vertex = 0; +- +- for (size_t i = 0; i < index_count; ++i) +- { +- unsigned int index = indices[i]; +- assert(index < vertex_count); +- +- unsigned int& remap = vertex_remap[index]; +- +- if (remap == ~0u) // vertex was not added to destination VB +- { +- // add vertex +- memcpy(static_cast(destination) + next_vertex * vertex_size, static_cast(vertices) + index * vertex_size, vertex_size); +- +- remap = next_vertex++; +- } +- +- // modify indices in place +- indices[i] = remap; +- } +- +- assert(next_vertex <= vertex_count); +- +- return next_vertex; +-} +diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt +index 78ad8af..bee1ca6 100644 +--- a/src/utils/CMakeLists.txt ++++ b/src/utils/CMakeLists.txt +@@ -7,6 +7,8 @@ + ## Quick3DUtils Module: + ##################################################################### + ++qt_find_package(meshoptimizer PROVIDED_TARGETS meshoptimizer::meshoptimizer) ++ + qt_internal_add_module(Quick3DUtils + SOURCES + qqsbcollection.cpp qqsbcollection_p.h +@@ -26,49 +28,17 @@ qt_internal_add_module(Quick3DUtils + qquick3dprofiler_p.h + ../3rdparty/xatlas/xatlas.cpp ../3rdparty/xatlas/xatlas.h + qssglightmapuvgenerator.cpp qssglightmapuvgenerator_p.h +- ../3rdparty/meshoptimizer/src/allocator.cpp +- ../3rdparty/meshoptimizer/src/clusterizer.cpp +- ../3rdparty/meshoptimizer/src/indexcodec.cpp +- ../3rdparty/meshoptimizer/src/indexgenerator.cpp +- ../3rdparty/meshoptimizer/src/meshoptimizer.h +- ../3rdparty/meshoptimizer/src/overdrawanalyzer.cpp +- ../3rdparty/meshoptimizer/src/overdrawoptimizer.cpp +- ../3rdparty/meshoptimizer/src/simplifier.cpp +- ../3rdparty/meshoptimizer/src/spatialorder.cpp +- ../3rdparty/meshoptimizer/src/stripifier.cpp +- ../3rdparty/meshoptimizer/src/vcacheanalyzer.cpp +- ../3rdparty/meshoptimizer/src/vcacheoptimizer.cpp +- ../3rdparty/meshoptimizer/src/vertexcodec.cpp +- ../3rdparty/meshoptimizer/src/vertexfilter.cpp +- ../3rdparty/meshoptimizer/src/vfetchanalyzer.cpp +- ../3rdparty/meshoptimizer/src/vfetchoptimizer.cpp + NO_UNITY_BUILD_SOURCES + ../3rdparty/xatlas/xatlas.cpp ../3rdparty/xatlas/xatlas.h +- ../3rdparty/meshoptimizer/src/allocator.cpp +- ../3rdparty/meshoptimizer/src/clusterizer.cpp +- ../3rdparty/meshoptimizer/src/indexcodec.cpp +- ../3rdparty/meshoptimizer/src/indexgenerator.cpp +- ../3rdparty/meshoptimizer/src/meshoptimizer.h +- ../3rdparty/meshoptimizer/src/overdrawanalyzer.cpp +- ../3rdparty/meshoptimizer/src/overdrawoptimizer.cpp +- ../3rdparty/meshoptimizer/src/simplifier.cpp +- ../3rdparty/meshoptimizer/src/spatialorder.cpp +- ../3rdparty/meshoptimizer/src/stripifier.cpp +- ../3rdparty/meshoptimizer/src/vcacheanalyzer.cpp +- ../3rdparty/meshoptimizer/src/vcacheoptimizer.cpp +- ../3rdparty/meshoptimizer/src/vertexcodec.cpp +- ../3rdparty/meshoptimizer/src/vertexfilter.cpp +- ../3rdparty/meshoptimizer/src/vfetchanalyzer.cpp +- ../3rdparty/meshoptimizer/src/vfetchoptimizer.cpp + DEFINES + QT_BUILD_QUICK3DUTILS_LIB + INCLUDE_DIRECTORIES + ../3rdparty/xatlas +- ../3rdparty/meshoptimizer/src/ + LIBRARIES + Qt::CorePrivate + Qt::GuiPrivate + Qt::QuickPrivate ++ meshoptimizer::meshoptimizer + PUBLIC_LIBRARIES + Qt::Core + Qt::Gui +-- +2.39.3 (Apple Git-146) + diff --git a/vcpkg/ports/qtquick3d/portfile.cmake b/vcpkg/ports/qtquick3d/portfile.cmake new file mode 100644 index 0000000000..8efdcf7893 --- /dev/null +++ b/vcpkg/ports/qtquick3d/portfile.cmake @@ -0,0 +1,30 @@ +set(SCRIPT_PATH "${CURRENT_INSTALLED_DIR}/share/qtbase") +set(${PORT}_PATCHES 0001-devendor-meshoptimizer.patch) + +include("${SCRIPT_PATH}/qt_install_submodule.cmake") + +# General features: +vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS +FEATURES + "assimp" FEATURE_quick3d_assimp + #"assimp" CMAKE_REQUIRE_FIND_PACKAGE_WrapQuick3DAssimp +INVERTED_FEATURES + "assimp" CMAKE_DISABLE_FIND_PACKAGE_WrapQuick3DAssimp + ) + +if("assimp" IN_LIST FEATURES) + list(APPEND FEATURE_OPTIONS -DINPUT_quick3d_assimp=system -DTEST_quick3d_assimp=ON -DHAVE_Assimp=ON) +else() + list(APPEND FEATURE_OPTIONS -DINPUT_quick3d_assimp=no) +endif() + +set(TOOL_NAMES balsam balsamui meshdebug shadergen instancer materialeditor shapegen) + +qt_install_submodule(PATCHES ${${PORT}_PATCHES} + TOOL_NAMES ${TOOL_NAMES} + CONFIGURE_OPTIONS + ${FEATURE_OPTIONS} + -DCMAKE_FIND_PACKAGE_TARGETS_GLOBAL=ON + CONFIGURE_OPTIONS_RELEASE + CONFIGURE_OPTIONS_DEBUG + ) diff --git a/vcpkg/ports/qtquick3d/vcpkg.json b/vcpkg/ports/qtquick3d/vcpkg.json new file mode 100644 index 0000000000..96f373f641 --- /dev/null +++ b/vcpkg/ports/qtquick3d/vcpkg.json @@ -0,0 +1,39 @@ +{ + "name": "qtquick3d", + "version": "6.7.3", + "description": "Qt Quick 3D provides a high-level API for creating 3D content and 3D user interfaces based on Qt Quick.", + "homepage": "https://www.qt.io/", + "license": null, + "dependencies": [ + "meshoptimizer", + { + "name": "qtbase", + "default-features": false, + "features": [ + "gui" + ] + }, + "qtdeclarative", + { + "name": "qtquick3d", + "host": true, + "default-features": false + }, + "qtquicktimeline", + "qtshadertools" + ], + "default-features": [ + "default-features" + ], + "features": { + "assimp": { + "description": "assimp", + "dependencies": [ + "assimp" + ] + }, + "default-features": { + "description": "Platform-dependent default features" + } + } +}