From 39a78d42e105d19b3ae687dbde3ac3035584f1aa Mon Sep 17 00:00:00 2001 From: rsp4jack Date: Wed, 24 Jan 2024 22:18:49 +0800 Subject: [PATCH] =?UTF-8?q?Use=20FetchContent=20instead=20including=20subt?= =?UTF-8?q?ree=20=E5=9B=A0=E4=B8=BA=E7=99=BD=E9=85=B1=E4=B8=8D=E5=96=9C?= =?UTF-8?q?=E6=AC=A2=E6=9C=89=E8=BF=99=E4=B9=88=E5=A4=9A=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../workflows/encodingcheck/CMakeLists.txt | 25 +- .../encodingcheck/cmake/xdgmime.cmake | 25 + .../.github/workflows/ubuntu22-sanitize.yml | 25 - .../is_utf8/.github/workflows/ubuntu22.yml | 25 - .../is_utf8/.github/workflows/vs17.yml | 35 - .../encodingcheck/is_utf8/.gitignore | 2 - .../encodingcheck/is_utf8/CMakeLists.txt | 106 - .../encodingcheck/is_utf8/LICENSE-APACHE | 204 - .../encodingcheck/is_utf8/LICENSE-BOOST | 23 - .../encodingcheck/is_utf8/LICENSE-MIT | 18 - .../workflows/encodingcheck/is_utf8/README.md | 109 - .../is_utf8/benchmarks/CMakeLists.txt | 13 - .../is_utf8/benchmarks/bench.cpp | 251 - .../is_utf8/cmake/add_cpp_test.cmake | 59 - .../encodingcheck/is_utf8/cmake/import.cmake | 48 - .../is_utf8/cmake/is_utf8-config.cmake.in | 2 - .../encodingcheck/is_utf8/include/is_utf8.h | 10 - .../encodingcheck/is_utf8/src/CMakeLists.txt | 38 - .../encodingcheck/is_utf8/src/is_utf8.cpp | 6734 ----------------- .../is_utf8/tests/CMakeLists.txt | 8 - .../encodingcheck/is_utf8/tests/unit.cpp | 338 - .../encodingcheck/xdgmime/CMakeLists.txt | 16 - .../workflows/encodingcheck/xdgmime/README | 8 - .../encodingcheck/xdgmime/print-mime-data.c | 169 - .../encodingcheck/xdgmime/test-mime-data.c | 312 - .../encodingcheck/xdgmime/test-mime.c | 186 - .../workflows/encodingcheck/xdgmime/xdgmime.c | 1028 --- .../workflows/encodingcheck/xdgmime/xdgmime.h | 120 - .../encodingcheck/xdgmime/xdgmimealias.c | 168 - .../encodingcheck/xdgmime/xdgmimealias.h | 35 - .../encodingcheck/xdgmime/xdgmimecache.c | 1216 --- .../encodingcheck/xdgmime/xdgmimecache.h | 66 - .../encodingcheck/xdgmime/xdgmimeglob.c | 675 -- .../encodingcheck/xdgmime/xdgmimeglob.h | 54 - .../encodingcheck/xdgmime/xdgmimeicon.c | 167 - .../encodingcheck/xdgmime/xdgmimeicon.h | 34 - .../encodingcheck/xdgmime/xdgmimeint.c | 190 - .../encodingcheck/xdgmime/xdgmimeint.h | 62 - .../encodingcheck/xdgmime/xdgmimemagic.c | 819 -- .../encodingcheck/xdgmime/xdgmimemagic.h | 41 - .../encodingcheck/xdgmime/xdgmimeparent.c | 204 - .../encodingcheck/xdgmime/xdgmimeparent.h | 35 - 42 files changed, 47 insertions(+), 13656 deletions(-) create mode 100644 .github/workflows/encodingcheck/cmake/xdgmime.cmake delete mode 100644 .github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22-sanitize.yml delete mode 100644 .github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22.yml delete mode 100644 .github/workflows/encodingcheck/is_utf8/.github/workflows/vs17.yml delete mode 100644 .github/workflows/encodingcheck/is_utf8/.gitignore delete mode 100644 .github/workflows/encodingcheck/is_utf8/CMakeLists.txt delete mode 100644 .github/workflows/encodingcheck/is_utf8/LICENSE-APACHE delete mode 100644 .github/workflows/encodingcheck/is_utf8/LICENSE-BOOST delete mode 100644 .github/workflows/encodingcheck/is_utf8/LICENSE-MIT delete mode 100644 .github/workflows/encodingcheck/is_utf8/README.md delete mode 100644 .github/workflows/encodingcheck/is_utf8/benchmarks/CMakeLists.txt delete mode 100644 .github/workflows/encodingcheck/is_utf8/benchmarks/bench.cpp delete mode 100644 .github/workflows/encodingcheck/is_utf8/cmake/add_cpp_test.cmake delete mode 100644 .github/workflows/encodingcheck/is_utf8/cmake/import.cmake delete mode 100644 .github/workflows/encodingcheck/is_utf8/cmake/is_utf8-config.cmake.in delete mode 100644 .github/workflows/encodingcheck/is_utf8/include/is_utf8.h delete mode 100644 .github/workflows/encodingcheck/is_utf8/src/CMakeLists.txt delete mode 100644 .github/workflows/encodingcheck/is_utf8/src/is_utf8.cpp delete mode 100644 .github/workflows/encodingcheck/is_utf8/tests/CMakeLists.txt delete mode 100644 .github/workflows/encodingcheck/is_utf8/tests/unit.cpp delete mode 100644 .github/workflows/encodingcheck/xdgmime/CMakeLists.txt delete mode 100644 .github/workflows/encodingcheck/xdgmime/README delete mode 100644 .github/workflows/encodingcheck/xdgmime/print-mime-data.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/test-mime-data.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/test-mime.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmime.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmime.h delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimealias.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimealias.h delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimecache.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimecache.h delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeglob.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeglob.h delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeicon.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeicon.h delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeint.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeint.h delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimemagic.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimemagic.h delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeparent.c delete mode 100644 .github/workflows/encodingcheck/xdgmime/xdgmimeparent.h diff --git a/.github/workflows/encodingcheck/CMakeLists.txt b/.github/workflows/encodingcheck/CMakeLists.txt index b1a78952..2746dea2 100644 --- a/.github/workflows/encodingcheck/CMakeLists.txt +++ b/.github/workflows/encodingcheck/CMakeLists.txt @@ -2,8 +2,27 @@ cmake_minimum_required(VERSION 3.17) project(encodingcheck LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) -add_subdirectory(xdgmime) -add_subdirectory(is_utf8) +include(FetchContent) + +FetchContent_Declare( + xdgmime + GIT_REPOSITORY https://gitlab.freedesktop.org/xdg/xdgmime.git + GIT_TAG master +) + +FetchContent_Declare( + is_utf8 + GIT_REPOSITORY https://github.com/simdutf/is_utf8.git + GIT_TAG v1.3.1 +) + +FetchContent_MakeAvailable(xdgmime is_utf8) + +message(xdgmime_SOURCE_DIR=${xdgmime_SOURCE_DIR}) + +set(xdgmime_SOURCE_DIR ${xdgmime_SOURCE_DIR}) + +include(cmake/xdgmime.cmake) add_executable(encodingcheck encodingcheck.cpp) -target_link_libraries(encodingcheck PRIVATE libxdgmime is_utf8) +target_link_libraries(encodingcheck PRIVATE libxdgmime is_utf8::is_utf8) diff --git a/.github/workflows/encodingcheck/cmake/xdgmime.cmake b/.github/workflows/encodingcheck/cmake/xdgmime.cmake new file mode 100644 index 00000000..d3173c3a --- /dev/null +++ b/.github/workflows/encodingcheck/cmake/xdgmime.cmake @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.17) +# project(xdgmime LANGUAGES C) + +add_library(libxdgmime STATIC + ${xdgmime_SOURCE_DIR}/src/xdgmime.c + ${xdgmime_SOURCE_DIR}/src/xdgmimeglob.c + ${xdgmime_SOURCE_DIR}/src/xdgmimeint.c + ${xdgmime_SOURCE_DIR}/src/xdgmimemagic.c + ${xdgmime_SOURCE_DIR}/src/xdgmimealias.c + ${xdgmime_SOURCE_DIR}/src/xdgmimeparent.c + ${xdgmime_SOURCE_DIR}/src/xdgmimecache.c + ${xdgmime_SOURCE_DIR}/src/xdgmimeicon.c +) + +add_executable(test-mime ${xdgmime_SOURCE_DIR}/src/test-mime.c) +target_link_libraries(test-mime PRIVATE libxdgmime) + +add_executable(test-mime-data ${xdgmime_SOURCE_DIR}/src/test-mime-data.c) +target_link_libraries(test-mime-data PRIVATE libxdgmime) + +add_executable(print-mime-data ${xdgmime_SOURCE_DIR}/src/print-mime-data.c) +target_link_libraries(print-mime-data PRIVATE libxdgmime) + +target_include_directories(libxdgmime PUBLIC ${xdgmime_SOURCE_DIR}/src/) +target_compile_definitions(libxdgmime PRIVATE HAVE_MMAP) \ No newline at end of file diff --git a/.github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22-sanitize.yml b/.github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22-sanitize.yml deleted file mode 100644 index f40174c3..00000000 --- a/.github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22-sanitize.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Ubuntu 22.04 CI (GCC 11) with Sanitizers - -on: [push, pull_request] - -jobs: - ubuntu-build: - if: >- - ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && - ! contains(toJSON(github.event.commits.*.message), '[skip github]') - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - - name: Use cmake - run: | - mkdir builddebug && - cd builddebug && - cmake -DIS_UTF8_SANITIZE=ON -DCMAKE_BUILD_TYPE=Debug .. && - cmake --build . && - ctest -j --output-on-failure -LE explicitonly && - cd .. && - mkdir build && - cd build && - cmake -DIS_UTF8_SANITIZE=ON .. && - cmake --build . && - ctest -j --output-on-failure -LE explicitonly \ No newline at end of file diff --git a/.github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22.yml b/.github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22.yml deleted file mode 100644 index d67d0675..00000000 --- a/.github/workflows/encodingcheck/is_utf8/.github/workflows/ubuntu22.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Ubuntu 22.04 CI (GCC 11) - -on: [push, pull_request] - -jobs: - ubuntu-build: - if: >- - ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && - ! contains(toJSON(github.event.commits.*.message), '[skip github]') - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - - name: Use cmake - run: | - mkdir builddebug && - cd builddebug && - cmake -DCMAKE_BUILD_TYPE=Debug .. && - cmake --build . && - ctest -j --output-on-failure -LE explicitonly && - cd .. && - mkdir build && - cd build && - cmake .. && - cmake --build . && - ctest -j --output-on-failure -LE explicitonly \ No newline at end of file diff --git a/.github/workflows/encodingcheck/is_utf8/.github/workflows/vs17.yml b/.github/workflows/encodingcheck/is_utf8/.github/workflows/vs17.yml deleted file mode 100644 index ea846abe..00000000 --- a/.github/workflows/encodingcheck/is_utf8/.github/workflows/vs17.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: VS17-CI - -on: [push, pull_request] - -jobs: - ci: - if: >- - ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && - ! contains(toJSON(github.event.commits.*.message), '[skip github]') - name: windows-vs17 - runs-on: windows-latest - strategy: - fail-fast: false - matrix: - include: - - {gen: Visual Studio 17 2022, arch: x64, shared: ON} - - {gen: Visual Studio 17 2022, arch: x64, shared: OFF} - steps: - - name: checkout - uses: actions/checkout@v3 - - name: Configure - run: | - cmake -G "${{matrix.gen}}" -A ${{matrix.arch}} -DBUILD_SHARED_LIBS=${{matrix.shared}} -B build - - name: Build Debug - run: cmake --build build --config Debug --verbose - - name: Build Release - run: cmake --build build --config Release --verbose - - name: Run Release tests - run: | - cd build - ctest -C Release -LE explicitonly --output-on-failure - - name: Run Debug tests - run: | - cd build - ctest -C Debug -LE explicitonly --output-on-failure \ No newline at end of file diff --git a/.github/workflows/encodingcheck/is_utf8/.gitignore b/.github/workflows/encodingcheck/is_utf8/.gitignore deleted file mode 100644 index 91151f24..00000000 --- a/.github/workflows/encodingcheck/is_utf8/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -build/ -src/dependencies/ diff --git a/.github/workflows/encodingcheck/is_utf8/CMakeLists.txt b/.github/workflows/encodingcheck/is_utf8/CMakeLists.txt deleted file mode 100644 index 97c20a78..00000000 --- a/.github/workflows/encodingcheck/is_utf8/CMakeLists.txt +++ /dev/null @@ -1,106 +0,0 @@ -cmake_minimum_required(VERSION 3.15) - -project(is_utf8 - DESCRIPTION "Fast UTF-8 Validation" - LANGUAGES CXX - VERSION 1.3.1 -) - -include(GNUInstallDirs) -include(CTest) - -option(IS_UTF8_SANITIZE "Sanitize addresses" OFF) - -if (NOT CMAKE_BUILD_TYPE) - message(STATUS "No build type selected, default to Release") - if(IS_UTF8_SANITIZE) - set(CMAKE_BUILD_TYPE Debug CACHE STRING "Choose the type of build." FORCE) - else() - set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) - endif() -endif() - -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_MACOSX_RPATH OFF) - -set(IS_UTF8_LIB_VERSION "1.3.1" CACHE STRING "is_utf8 library version") -set(IS_UTF8_LIB_SOVERSION "1" CACHE STRING "is_utf8 library soversion") - -set(IS_UTF8_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) -add_subdirectory(src) - -if (BUILD_TESTING) - message(STATUS "The tests are enabled.") - add_subdirectory(tests) -else() - message(STATUS "The tests are disabled.") -endif(BUILD_TESTING) - - -add_subdirectory(benchmarks) -# ---- Install rules ---- -add_library(is_utf8::is_utf8 ALIAS is_utf8) - -set_target_properties( - is_utf8 PROPERTIES - VERSION "${IS_UTF8_LIB_VERSION}" - SOVERSION "${IS_UTF8_LIB_SOVERSION}" - WINDOWS_EXPORT_ALL_SYMBOLS YES -) - -include(CMakePackageConfigHelpers) -include(GNUInstallDirs) - -install( - FILES include/is_utf8.h - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - COMPONENT is_utf8_Development -) - -install( - TARGETS is_utf8 - EXPORT is_utf8Targets - RUNTIME COMPONENT is_utf8_Runtime - LIBRARY COMPONENT is_utf8_Runtime - NAMELINK_COMPONENT is_utf8_Development - ARCHIVE COMPONENT is_utf8_Development - INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" -) - -configure_file(cmake/is_utf8-config.cmake.in is_utf8-config.cmake @ONLY) - -write_basic_package_version_file( - is_utf8-config-version.cmake - COMPATIBILITY SameMinorVersion -) - -set( - IS_UTF8_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/is_utf8" - CACHE STRING "CMake package config location relative to the install prefix" -) -mark_as_advanced(IS_UTF8_INSTALL_CMAKEDIR) - -install( - FILES - "${PROJECT_BINARY_DIR}/is_utf8-config.cmake" - "${PROJECT_BINARY_DIR}/is_utf8-config-version.cmake" - DESTINATION "${IS_UTF8_INSTALL_CMAKEDIR}" - COMPONENT is_utf8_Development -) - -# -# CPack -# -if(is_top_project) - set(CPACK_PACKAGE_VENDOR "Daniel Lemire") - set(CPACK_PACKAGE_CONTACT "lemire@gmail.com") - set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE-MIT") - set(CPACK_RPM_PACKAGE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE-MIT") - set(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md") - set(CPACK_SOURCE_GENERATOR "TGZ;ZIP") - include(CPack) -endif() - -# ---- diff --git a/.github/workflows/encodingcheck/is_utf8/LICENSE-APACHE b/.github/workflows/encodingcheck/is_utf8/LICENSE-APACHE deleted file mode 100644 index db016623..00000000 --- a/.github/workflows/encodingcheck/is_utf8/LICENSE-APACHE +++ /dev/null @@ -1,204 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -NOTE: This software may be used as part of software released under the -GNU General Public License (GPL) version 2. diff --git a/.github/workflows/encodingcheck/is_utf8/LICENSE-BOOST b/.github/workflows/encodingcheck/is_utf8/LICENSE-BOOST deleted file mode 100644 index 36b7cd93..00000000 --- a/.github/workflows/encodingcheck/is_utf8/LICENSE-BOOST +++ /dev/null @@ -1,23 +0,0 @@ -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/.github/workflows/encodingcheck/is_utf8/LICENSE-MIT b/.github/workflows/encodingcheck/is_utf8/LICENSE-MIT deleted file mode 100644 index 5efe84c9..00000000 --- a/.github/workflows/encodingcheck/is_utf8/LICENSE-MIT +++ /dev/null @@ -1,18 +0,0 @@ -Copyright 2022 The is_utf8 authors - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/.github/workflows/encodingcheck/is_utf8/README.md b/.github/workflows/encodingcheck/is_utf8/README.md deleted file mode 100644 index fcce9354..00000000 --- a/.github/workflows/encodingcheck/is_utf8/README.md +++ /dev/null @@ -1,109 +0,0 @@ -# is_utf8 - -Most strings online are in unicode using the UTF-8 encoding. Validating strings -quickly before accepting them is important. - -## How to use is_utf8 - -This is a simple one-source file library to validate UTF-8 strings at high -speeds using SIMD instructions. It works on all platforms (ARM, x64). - -Build and link `is_utf8.cpp` with your project. Code usage: - -```C++ - #include "is_utf8.h" - - char * mystring = ... - bool is_it_valid = is_utf8(mystring, thestringlength); -``` - -It should be able to validate strings using less than 1 cycle per input byte. - -## Requirements - -- C++11 compatible compiler. We support LLVM clang, GCC, Visual Studio. (Our - optional benchmark tool requires C++17.) -- For high speed, you should have a recent 64-bit system (e.g., ARM or x64). -- If you rely on CMake, you should use a recent CMake (at least 3.15). -- AVX-512 support require a processor with AVX512-VBMI2 (Ice Lake or better) and - a recent compiler (GCC 8 or better, Visual Studio 2019 or better, LLVM clang 6 - or better). You need a correspondingly recent assembler such as gas (2.30+) or - nasm (2.14+): recent compilers usually come with recent assemblers. If you mix - a recent compiler with an incompatible/old assembler (e.g., when using a - recent compiler with an old Linux distribution), you may get errors at build - time because the compiler produces instructions that the assembler does not - recognize: you should update your assembler to match your compiler (e.g., - upgrade binutils to version 2.30 or better under Linux) or use an older - compiler matching the capabilities of your assembler. - -## Build with CMake - -``` -cmake -B build -cmake --build build -cd build -ctest . -``` - -Visual Studio users must specify whether they want to build the Release or Debug -version. - -To run benchmarks, build and execute the `bench` command. - -``` -cmake -B build -cmake --build build -./build/benchmarks/bench -``` - -Instructions are similar for Visual Studio users. - -## Real-word usage - -This C++ library is part of the JavaScript package -[utf-8-validate](https://github.com/websockets/utf-8-validate). The -utf-8-validate package is routinely downloaded more than -[a million times per week](https://www.npmjs.com/package/utf-8-validate). - -If you are using Node JS (19.4.0 or better), you already have access to this -function as -[`buffer.isUtf8(input)`](https://nodejs.org/api/buffer.html#bufferisutf8input). - -## Reference - -- John Keiser, Daniel Lemire, - [Validating UTF-8 In Less Than One Instruction Per Byte](https://arxiv.org/abs/2010.03090), - Software: Practice & Experience 51 (5), 2021 - -## Want more? - -If you want a wide range of fast Unicode function for production use, you can -rely on the simdutf library. It is as simple as the following: - -```C++ -#include "simdutf.cpp" -#include "simdutf.h" - -int main(int argc, char *argv[]) { - const char *source = "1234"; - // 4 == strlen(source) - bool validutf8 = simdutf::validate_utf8(source, 4); - if (validutf8) { - std::cout << "valid UTF-8" << std::endl; - } else { - std::cerr << "invalid UTF-8" << std::endl; - return EXIT_FAILURE; - } -} -``` - -See https://github.com/simdutf/ - -## License - -This library is distributed under the terms of any of the following licenses, at -your option: - -- Apache License (Version 2.0) [LICENSE-APACHE](LICENSE-APACHE), -- Boost Software License [LICENSE-BOOST](LICENSE-BOOST), or -- MIT License [LICENSE-MIT](LICENSE-MIT). diff --git a/.github/workflows/encodingcheck/is_utf8/benchmarks/CMakeLists.txt b/.github/workflows/encodingcheck/is_utf8/benchmarks/CMakeLists.txt deleted file mode 100644 index 0bceec72..00000000 --- a/.github/workflows/encodingcheck/is_utf8/benchmarks/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ - -include(${PROJECT_SOURCE_DIR}/cmake/import.cmake) -set_off(SIMDUTF_TOOLS) -set_off(SIMDUTF_BENCHMARKS) - -import_dependency(simdutf simdutf/simdutf v2.0.9) -set(BUILD_TESTING OFF) -add_dependency(simdutf) -unset(BUILD_TESTING) - -add_executable(bench bench.cpp) -target_link_libraries(bench PRIVATE is_utf8) -target_link_libraries(bench PRIVATE simdutf) diff --git a/.github/workflows/encodingcheck/is_utf8/benchmarks/bench.cpp b/.github/workflows/encodingcheck/is_utf8/benchmarks/bench.cpp deleted file mode 100644 index 44928940..00000000 --- a/.github/workflows/encodingcheck/is_utf8/benchmarks/bench.cpp +++ /dev/null @@ -1,251 +0,0 @@ -#include "is_utf8.h" -#include "simdutf.h" -#include -#include -#include -#include -#include -#include -#include -#include - -uint64_t nano() { - return std::chrono::duration_cast<::std::chrono::nanoseconds>( - std::chrono::steady_clock::now().time_since_epoch()) - .count(); -} - -#ifdef _MSC_VER -#define never_inline __declspec(noinline) -#else -#define never_inline __attribute__((noinline)) -#endif - -// generate a string having at least length N -// can exceed by up to 3 chars, returns the actual length -size_t populate_utf8(char *data, size_t N) { - size_t i = 0; - for (; i < N;) { - int w = rand() & 0xFF; - if (w < 0x80) { - data[i++] = 0x20; // w; - } else if (w < 0xE0) { - data[i++] = 0xC2 + rand() % (0xDF - 0xC2 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w == 0xE0) { - data[i++] = w; - data[i++] = 0xA0 + rand() % (0xBF - 0xA0 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w <= 0xEC) { - data[i++] = w; - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w == 0xED) { - data[i++] = w; - data[i++] = 0x80 + rand() % (0x9F - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w <= 0xEF) { - data[i++] = w; - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w < 0xF0) { - data[i++] = 0xF1 + rand() % (0xF3 - 0xF1 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w == 0xF0) { - data[i++] = w; - data[i++] = 0x90 + rand() % (0xBF - 0x90 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w <= 0xF3) { - data[i++] = 0xF1 + rand() % (0xF3 - 0xF1 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } else if (w == 0xF4) { - data[i++] = w; - data[i++] = 0x80 + rand() % (0x8F - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); - } - } - data[i] = '\0'; - return i; -} - -// copied in part from Guava -static never_inline bool basic_validate_utf8(const char *b, size_t length) { - const unsigned char *bytes = (const unsigned char *)b; - for (size_t index = 0;;) { - unsigned char byte1; - - do { // fast ASCII Path - if (index >= length) { - return true; - } - byte1 = bytes[index++]; - } while (byte1 < 0x80); - if (byte1 < 0xE0) { - // Two-byte form. - if (index == length) { - return false; - } - if (byte1 < 0xC2 || bytes[index++] > 0xBF) { - return false; - } - } else if (byte1 < 0xF0) { - // Three-byte form. - if (index + 1 >= length) { - return false; - } - unsigned char byte2 = bytes[index++]; - if (byte2 > 0xBF - // Overlong? 5 most significant bits must not all be zero. - || (byte1 == 0xE0 && byte2 < 0xA0) - // Check for illegal surrogate codepoints. - || (byte1 == 0xED && 0xA0 <= byte2) - // Third byte trailing-byte test. - || bytes[index++] > 0xBF) { - return false; - } - } else { - - // Four-byte form. - if (index + 2 >= length) { - return false; - } - int byte2 = bytes[index++]; - if (byte2 > 0xBF - // Check that 1 <= plane <= 16. Tricky optimized form of: - // if (byte1 > (byte) 0xF4 - // || byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 - // || byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) - || (((byte1 << 28) + (byte2 - 0x90)) >> 30) != 0 - // Third byte trailing-byte test - || bytes[index++] > 0xBF - // Fourth byte trailing-byte test - || bytes[index++] > 0xBF) { - return false; - } - } - } -} - - -bool zerobuffer_bench(size_t N) { - printf("zero buffer \n"); - printf("string size = %zu \n", N); - char *input = new char[N]{}; - volatile bool isgood{true}; - - { - uint64_t start = nano(); - uint64_t finish = start; - size_t count{0}; - uint64_t threshold = 500000000; - for (; finish - start < threshold;) { - count++; - isgood &= basic_validate_utf8(input, N); - finish = nano(); - } - double t = (N * count) / double(finish - start); - - printf("basic_validate_utf8 %f GB/s\n", t); - } - - { - uint64_t start = nano(); - uint64_t finish = start; - size_t count{0}; - uint64_t threshold = 500000000; - for (; finish - start < threshold;) { - count++; - isgood &= simdutf::validate_utf8(input, N); - finish = nano(); - } - double t = (N * count) / double(finish - start); - - printf("simdutf %f GB/s\n", t); - } - - { - uint64_t start = nano(); - uint64_t finish = start; - size_t count{0}; - uint64_t threshold = 500000000; - for (; finish - start < threshold;) { - count++; - isgood &= is_utf8(input, N); - finish = nano(); - } - double t = (N * count) / double(finish - start); - - printf("is_utf8 %f GB/s\n", t); - } - delete[] input; - printf("\n"); - return isgood; -} - -bool bench(size_t N) { - printf("random UTF-8\n"); - printf("string size = %zu \n", N); - char *input = new char[N]; - populate_utf8(input, N); - volatile bool isgood{true}; - - { - uint64_t start = nano(); - uint64_t finish = start; - size_t count{0}; - uint64_t threshold = 500000000; - for (; finish - start < threshold;) { - count++; - isgood &= basic_validate_utf8(input, N); - finish = nano(); - } - double t = (N * count) / double(finish - start); - - printf("basic_validate_utf8 %f GB/s\n", t); - } - - { - uint64_t start = nano(); - uint64_t finish = start; - size_t count{0}; - uint64_t threshold = 500000000; - for (; finish - start < threshold;) { - count++; - isgood &= simdutf::validate_utf8(input, N); - finish = nano(); - } - double t = (N * count) / double(finish - start); - - printf("simdutf %f GB/s\n", t); - } - - { - uint64_t start = nano(); - uint64_t finish = start; - size_t count{0}; - uint64_t threshold = 500000000; - for (; finish - start < threshold;) { - count++; - isgood &= is_utf8(input, N); - finish = nano(); - } - double t = (N * count) / double(finish - start); - - printf("is_utf8 %f GB/s\n", t); - } - delete[] input; - printf("\n"); - return isgood; -} - -int main() { - return (bench(40096) & bench(100000) & bench(50000)) - & (zerobuffer_bench(40096) & zerobuffer_bench(100000) & zerobuffer_bench(50000)) - ? EXIT_SUCCESS : EXIT_FAILURE; -} \ No newline at end of file diff --git a/.github/workflows/encodingcheck/is_utf8/cmake/add_cpp_test.cmake b/.github/workflows/encodingcheck/is_utf8/cmake/add_cpp_test.cmake deleted file mode 100644 index c4ce6a53..00000000 --- a/.github/workflows/encodingcheck/is_utf8/cmake/add_cpp_test.cmake +++ /dev/null @@ -1,59 +0,0 @@ -# Helper so we don't have to repeat ourselves so much -# Usage: add_cpp_test(testname [COMPILE_ONLY] [SOURCES a.cpp b.cpp ...] [LABELS acceptance per_implementation ...]) -# SOURCES defaults to testname.cpp if not specified. -function(add_cpp_test TEST_NAME) - # Parse arguments - cmake_parse_arguments(PARSE_ARGV 1 ARGS "COMPILE_ONLY;LIBRARY;WILL_FAIL" "" "SOURCES;LABELS;DEPENDENCY_OF") - if (NOT ARGS_SOURCES) - list(APPEND ARGS_SOURCES ${TEST_NAME}.cpp) - endif() - if (ARGS_COMPILE_ONLY) - list(APPEND ${ARGS_LABELS} compile_only) - endif() - if (IS_UTF8_SANITIZE) - add_compile_options(-fsanitize=address -fno-omit-frame-pointer -fno-sanitize-recover=all) - add_compile_definitions(ASAN_OPTIONS=detect_leaks=1) - endif() - # Add the compile target - if (ARGS_LIBRARY) - add_library(${TEST_NAME} STATIC ${ARGS_SOURCES}) - else(ARGS_LIBRARY) - add_executable(${TEST_NAME} ${ARGS_SOURCES}) - endif(ARGS_LIBRARY) - - # Add test - if (ARGS_COMPILE_ONLY OR ARGS_LIBRARY) - add_test( - NAME ${TEST_NAME} - COMMAND ${CMAKE_COMMAND} --build . --target ${TEST_NAME} --config $ - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - ) - set_target_properties(${TEST_NAME} PROPERTIES EXCLUDE_FROM_ALL TRUE EXCLUDE_FROM_DEFAULT_BUILD TRUE) - else() - add_test(${TEST_NAME} ${TEST_NAME}) - - # Add to