diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index 18e149ccfd170..3dc0d7aa85979 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -69,19 +69,27 @@ written authorization of the copyright holder. License notice for zlib-ng ----------------------- -https://github.com/zlib-ng/zlib-ng/blob/develop/LICENSE.md +https://github.com/zlib-ng/zlib-ng/blob/d54e3769be0c522015b784eca2af258b1c026107/LICENSE.md (C) 1995-2024 Jean-loup Gailly and Mark Adler -This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: -The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. -Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. -This notice may not be removed or altered from any source distribution. +3. This notice may not be removed or altered from any source distribution. License notice for LinuxTracepoints ----------------------------------- diff --git a/src/libraries/Common/src/System/IO/Compression/ZLibNative.CompressionLevel.cs b/src/libraries/Common/src/System/IO/Compression/ZLibNative.CompressionLevel.cs new file mode 100644 index 0000000000000..4d55f6373ef6f --- /dev/null +++ b/src/libraries/Common/src/System/IO/Compression/ZLibNative.CompressionLevel.cs @@ -0,0 +1,49 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.IO.Compression; + +internal static partial class ZLibNative +{ + /// + ///

ZLib can accept any integer value between 0 and 9 (inclusive) as a valid compression level parameter: + /// 1 gives best speed, 9 gives best compression, 0 gives no compression at all (the input data is simply copied a block at a time). + /// CompressionLevel.DefaultCompression = -1 requests a default compromise between speed and compression + /// (currently equivalent to level 6).

+ /// + ///

How to choose a compression level:

+ /// + ///

The names NoCompression, BestSpeed, DefaultCompression, BestCompression are taken over from + /// the corresponding ZLib definitions, which map to our public NoCompression, Fastest, Optimal, and SmallestSize respectively.

+ ///

Optimal Compression:

+ ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.DefaultCompression;
+ /// int windowBits = 15; // or -15 if no headers required
+ /// int memLevel = 8;
+ /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

+ /// + ///

Fastest compression:

+ ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.BestSpeed;
+ /// int windowBits = 15; // or -15 if no headers required
+ /// int memLevel = 8;
+ /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

+ /// + ///

No compression (even faster, useful for data that cannot be compressed such some image formats):

+ ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.NoCompression;
+ /// int windowBits = 15; // or -15 if no headers required
+ /// int memLevel = 7;
+ /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

+ /// + ///

Smallest Size Compression:

+ ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.BestCompression;
+ /// int windowBits = 15; // or -15 if no headers required
+ /// int memLevel = 8;
+ /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

+ ///
+ public enum CompressionLevel : int + { + NoCompression = 0, + BestSpeed = 1, + DefaultCompression = -1, + BestCompression = 9 + } +} diff --git a/src/libraries/Common/src/System/IO/Compression/ZLibNative.cs b/src/libraries/Common/src/System/IO/Compression/ZLibNative.cs index 0113334b964e1..f5c7cf8077380 100644 --- a/src/libraries/Common/src/System/IO/Compression/ZLibNative.cs +++ b/src/libraries/Common/src/System/IO/Compression/ZLibNative.cs @@ -37,48 +37,6 @@ public enum ErrorCode : int VersionError = -6 } - /// - ///

ZLib can accept any integer value between 0 and 9 (inclusive) as a valid compression level parameter: - /// 1 gives best speed, 9 gives best compression, 0 gives no compression at all (the input data is simply copied a block at a time). - /// CompressionLevel.DefaultCompression = -1 requests a default compromise between speed and compression - /// (currently equivalent to level 6).

- /// - ///

How to choose a compression level:

- /// - ///

The names NoCompression, BestSpeed, DefaultCompression, BestCompression are taken over from - /// the corresponding ZLib definitions, which map to our public NoCompression, Fastest, Optimal, and SmallestSize respectively.

- ///

Optimal Compression:

- ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.DefaultCompression;
- /// int windowBits = 15; // or -15 if no headers required
- /// int memLevel = 8;
- /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

- /// - ///

Fastest compression:

- ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.BestSpeed;
- /// int windowBits = 15; // or -15 if no headers required
- /// int memLevel = 8;
- /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

- /// - ///

No compression (even faster, useful for data that cannot be compressed such some image formats):

- ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.NoCompression;
- /// int windowBits = 15; // or -15 if no headers required
- /// int memLevel = 7;
- /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

- /// - ///

Smallest Size Compression:

- ///

ZLibNative.CompressionLevel compressionLevel = ZLibNative.CompressionLevel.BestCompression;
- /// int windowBits = 15; // or -15 if no headers required
- /// int memLevel = 8;
- /// ZLibNative.CompressionStrategy strategy = ZLibNative.CompressionStrategy.DefaultStrategy;

- ///
- public enum CompressionLevel : int - { - NoCompression = 0, - BestSpeed = 1, - DefaultCompression = -1, - BestCompression = 9 - } - /// ///

From the ZLib manual:

///

CompressionStrategy is used to tune the compression algorithm.
diff --git a/src/libraries/Common/tests/System/IO/Compression/CompressionStreamUnitTestBase.cs b/src/libraries/Common/tests/System/IO/Compression/CompressionStreamUnitTestBase.cs index ed6f78c56d2cd..a6635777f8583 100644 --- a/src/libraries/Common/tests/System/IO/Compression/CompressionStreamUnitTestBase.cs +++ b/src/libraries/Common/tests/System/IO/Compression/CompressionStreamUnitTestBase.cs @@ -497,6 +497,30 @@ async Task GetLengthAsync(CompressionLevel compressionLevel) Assert.True(optimalLength >= smallestLength); } + [Theory] + [MemberData(nameof(UncompressedTestFilesZLib))] + public async Task ZLibCompressionOptions_SizeInOrder(string testFile) + { + using var uncompressedStream = await LocalMemoryStream.readAppFileAsync(testFile); + + async Task GetLengthAsync(int compressionLevel) + { + uncompressedStream.Position = 0; + using var mms = new MemoryStream(); + using var compressor = CreateStream(mms, new ZLibCompressionOptions() { CompressionLevel = compressionLevel, CompressionStrategy = ZLibCompressionStrategy.Default }, leaveOpen: false); + await uncompressedStream.CopyToAsync(compressor); + await compressor.FlushAsync(); + return mms.Length; + } + + long fastestLength = await GetLengthAsync(1); + long optimalLength = await GetLengthAsync(5); + long smallestLength = await GetLengthAsync(9); + + Assert.True(fastestLength >= optimalLength); + Assert.True(optimalLength >= smallestLength); + } + [Theory] [MemberData(nameof(ZLibOptionsRoundTripTestData))] public async Task RoundTripWithZLibCompressionOptions(string testFile, ZLibCompressionOptions options) @@ -537,28 +561,6 @@ private async Task CompressTestFile(LocalMemoryStream testStream, return compressorOutput; } - protected async Task CompressionLevel_SizeInOrderBase(string testFile) - { - using var uncompressedStream = await LocalMemoryStream.readAppFileAsync(testFile); - - async Task GetLengthAsync(int compressionLevel) - { - uncompressedStream.Position = 0; - using var mms = new MemoryStream(); - using var compressor = CreateStream(mms, new ZLibCompressionOptions() { CompressionLevel = compressionLevel, CompressionStrategy = ZLibCompressionStrategy.Default }, leaveOpen: false); - await uncompressedStream.CopyToAsync(compressor); - await compressor.FlushAsync(); - return mms.Length; - } - - long prev = await GetLengthAsync(0); - for (int i = 1; i < 10; i++) - { - long cur = await GetLengthAsync(i); - Assert.True(cur <= prev, $"Expected {cur} <= {prev} for quality {i}"); - prev = cur; - } - } } public enum TestScenario diff --git a/src/libraries/System.IO.Compression.Brotli/tests/System.IO.Compression.Brotli.Tests.csproj b/src/libraries/System.IO.Compression.Brotli/tests/System.IO.Compression.Brotli.Tests.csproj index 167caaaa6bc84..936e3768a0e4e 100644 --- a/src/libraries/System.IO.Compression.Brotli/tests/System.IO.Compression.Brotli.Tests.csproj +++ b/src/libraries/System.IO.Compression.Brotli/tests/System.IO.Compression.Brotli.Tests.csproj @@ -27,6 +27,8 @@ + diff --git a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj index 208844b5d67aa..9292ec1f46d83 100644 --- a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj +++ b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj @@ -35,6 +35,8 @@ + diff --git a/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Deflate.cs b/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Deflate.cs index 9c25d5e5483fb..d91a243b907cc 100644 --- a/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Deflate.cs +++ b/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Deflate.cs @@ -219,12 +219,5 @@ public override Task WriteAsync(byte[] buffer, int offset, int count, Cancellati return base.WriteAsync(buffer, offset, count, cancellationToken); } } - - [Theory] - [MemberData(nameof(UncompressedTestFilesZLib))] - public async Task ZLibCompressionLevel_SizeInOrder(string testFile) - { - await CompressionLevel_SizeInOrderBase(testFile); - } } } diff --git a/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Gzip.cs b/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Gzip.cs index 9fabd3dc31a26..9919b2c819ae3 100644 --- a/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Gzip.cs +++ b/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.Gzip.cs @@ -441,12 +441,5 @@ public override Task WriteAsync(byte[] buffer, int offset, int count, Cancellati return base.WriteAsync(buffer, offset, count, cancellationToken); } } - - [Theory] - [MemberData(nameof(UncompressedTestFilesZLib))] - public async Task ZLibCompressionLevel_SizeInOrder(string testFile) - { - await CompressionLevel_SizeInOrderBase(testFile); - } } } diff --git a/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.ZLib.cs b/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.ZLib.cs index ea868cdcf1f44..bed459833e182 100644 --- a/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.ZLib.cs +++ b/src/libraries/System.IO.Compression/tests/CompressionStreamUnitTests.ZLib.cs @@ -150,12 +150,5 @@ public void StreamTruncation_IsDetected(TestScenario testScenario) } }, testScenario.ToString()).Dispose(); } - - [Theory] - [MemberData(nameof(UncompressedTestFilesZLib))] - public async Task ZLibCompressionLevel_SizeInOrder(string testFile) - { - await CompressionLevel_SizeInOrderBase(testFile); - } } } diff --git a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj index be6e981939a7f..286ffea285025 100644 --- a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj +++ b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj @@ -35,6 +35,8 @@ + diff --git a/src/libraries/System.Net.WebSockets/src/System.Net.WebSockets.csproj b/src/libraries/System.Net.WebSockets/src/System.Net.WebSockets.csproj index 177e95dacee0a..b60a0bf8cc463 100644 --- a/src/libraries/System.Net.WebSockets/src/System.Net.WebSockets.csproj +++ b/src/libraries/System.Net.WebSockets/src/System.Net.WebSockets.csproj @@ -37,6 +37,8 @@ Link="Common\System\Net\WebSockets\WebSocketValidate.cs" /> + " + "$${CMAKE_CURRENT_SOURCE_DIR}>" "$") endforeach() @@ -1124,7 +1235,7 @@ if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) if(HAVE_NO_INTERPOSITION) set_target_properties(zlib PROPERTIES COMPILE_FLAGS "-fno-semantic-interposition") endif() - if(NOT APPLE) + if(NOT APPLE AND NOT CMAKE_SYSTEM_NAME STREQUAL AIX) if(NOT ZLIB_COMPAT) add_definitions(-DHAVE_SYMVER) endif() @@ -1183,6 +1294,7 @@ endif() if(NOT SKIP_INSTALL_LIBRARIES AND NOT SKIP_INSTALL_ALL) install(TARGETS ${ZLIB_INSTALL_LIBRARIES} + EXPORT ${EXPORT_NAME} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") @@ -1197,22 +1309,30 @@ if(NOT SKIP_INSTALL_HEADERS AND NOT SKIP_INSTALL_ALL) endif() if(NOT SKIP_INSTALL_FILES AND NOT SKIP_INSTALL_ALL) install(FILES ${ZLIB_PC} DESTINATION "${PKGCONFIG_INSTALL_DIR}") - # INFO: Mimics official zlib CMake target - # Generates ZLIB.cmake in case ZLIB_COMPAT=ON and always exports the CMake target ZLIB::ZLIB - # In case ZLIB_COMPAT=OFF, the CMake target and file follows zlib-ng naming convention - if (ZLIB_COMPAT) - if (TARGET zlib) - set_target_properties(zlib PROPERTIES EXPORT_NAME ZLIB) - else() - set_target_properties(zlibstatic PROPERTIES EXPORT_NAME ZLIB) - endif() - endif() - install(TARGETS ${ZLIB_INSTALL_LIBRARIES} - EXPORT ${EXPORT_NAME} - DESTINATION "${CMAKE_INSTALL_LIBDIR}") install(EXPORT ${EXPORT_NAME} DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${EXPORT_NAME}" NAMESPACE ${EXPORT_NAME}::) + # Use GNU-style variable names + set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) + set(LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}) + if (ZLIB_COMPAT) + set(PACKAGE_CONFIGNAME zlib) + set(PACKAGE_VERSION ${ZLIB_HEADER_VERSION}) + else() + set(PACKAGE_CONFIGNAME zlib-ng) + set(PACKAGE_VERSION ${ZLIBNG_HEADER_VERSION}) + endif() + configure_package_config_file(${PACKAGE_CONFIGNAME}-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${EXPORT_NAME} + PATH_VARS INCLUDE_INSTALL_DIR LIB_INSTALL_DIR) + write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config-version.cmake + VERSION ${PACKAGE_VERSION} + COMPATIBILITY AnyNewerVersion) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${EXPORT_NAME}) endif() #============================================================================ @@ -1251,6 +1371,7 @@ add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation") add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies") add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)") +add_feature_info(WITH_RUNTIME_CPU_DETECTION WITH_RUNTIME_CPU_DETECTION "Build with runtime CPU detection") add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings") add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting") add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking") diff --git a/src/native/external/zlib-ng/FAQ.zlib b/src/native/external/zlib-ng/FAQ.zlib index 163160c1032bf..698a4f83f0fbe 100644 --- a/src/native/external/zlib-ng/FAQ.zlib +++ b/src/native/external/zlib-ng/FAQ.zlib @@ -20,8 +20,7 @@ The latest zlib FAQ is at https://zlib.net/zlib_faq.html 2. Where can I get a Windows DLL version? The zlib sources can be compiled without change to produce a DLL. See the - file win32/DLL_FAQ.txt in the zlib distribution. Pointers to the - precompiled DLL are found in the zlib web site at https://zlib.net/ . + file win32/DLL_FAQ.txt in the zlib distribution. 3. Where can I get a Visual Basic interface to zlib? diff --git a/src/native/external/zlib-ng/LICENSE.md b/src/native/external/zlib-ng/LICENSE.md index adb48d47296b4..e866d7ac18620 100644 --- a/src/native/external/zlib-ng/LICENSE.md +++ b/src/native/external/zlib-ng/LICENSE.md @@ -1,4 +1,4 @@ -(C) 1995-2013 Jean-loup Gailly and Mark Adler +(C) 1995-2024 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages diff --git a/src/native/external/zlib-ng/Makefile.in b/src/native/external/zlib-ng/Makefile.in index 3859eb5325c3a..41f3e924553a3 100644 --- a/src/native/external/zlib-ng/Makefile.in +++ b/src/native/external/zlib-ng/Makefile.in @@ -1,5 +1,5 @@ # Makefile for zlib -# Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler +# Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler # For conditions of distribution and use, see copyright notice in zlib.h # To compile and test, type: @@ -28,8 +28,9 @@ SUFFIX=-ng TEST_LIBS=$(LIBNAME1).a LDSHARED=$(CC) LDSHAREDFLAGS=-shared +LDVERSIONSCRIPT= -VER=2.1.6 +VER=2.2.1 VER1=2 STATICLIB=$(LIBNAME1).a @@ -74,15 +75,17 @@ man3dir = ${mandir}/man3 pkgconfigdir = ${libdir}/pkgconfig OBJZ = \ + arch/generic/adler32_c.o \ + arch/generic/adler32_fold_c.o \ + arch/generic/chunkset_c.o \ + arch/generic/compare256_c.o \ + arch/generic/crc32_braid_c.o \ + arch/generic/crc32_fold_c.o \ + arch/generic/slide_hash_c.o \ adler32.o \ - adler32_fold.o \ - chunkset.o \ - compare256.o \ compress.o \ - cpu_features.o \ - crc32_braid.o \ + crc32.o \ crc32_braid_comb.o \ - crc32_fold.o \ deflate.o \ deflate_fast.o \ deflate_huff.o \ @@ -97,10 +100,10 @@ OBJZ = \ inftrees.o \ insert_string.o \ insert_string_roll.o \ - slide_hash.o \ trees.o \ uncompr.o \ zutil.o \ + cpu_features.o \ $(ARCH_STATIC_OBJS) OBJG = \ @@ -112,15 +115,17 @@ TESTOBJG = OBJC = $(OBJZ) $(OBJG) PIC_OBJZ = \ + arch/generic/adler32_c.lo \ + arch/generic/adler32_fold_c.lo \ + arch/generic/chunkset_c.lo \ + arch/generic/compare256_c.lo \ + arch/generic/crc32_braid_c.lo \ + arch/generic/crc32_fold_c.lo \ + arch/generic/slide_hash_c.lo \ adler32.lo \ - adler32_fold.lo \ - chunkset.lo \ - compare256.lo \ compress.lo \ - cpu_features.lo \ - crc32_braid.lo \ + crc32.lo \ crc32_braid_comb.lo \ - crc32_fold.lo \ deflate.lo \ deflate_fast.lo \ deflate_huff.lo \ @@ -135,10 +140,10 @@ PIC_OBJZ = \ inftrees.lo \ insert_string.lo \ insert_string_roll.lo \ - slide_hash.lo \ trees.lo \ uncompr.lo \ zutil.lo \ + cpu_features.lo \ $(ARCH_SHARED_OBJS) PIC_OBJG = \ @@ -169,6 +174,12 @@ $(ARCHDIR)/%.o: $(SRCDIR)/$(ARCHDIR)/%.c $(ARCHDIR)/%.lo: $(SRCDIR)/$(ARCHDIR)/%.c $(MAKE) -C $(ARCHDIR) $(notdir $@) +arch/generic/%.o: $(SRCDIR)/arch/generic/%.c + $(MAKE) -C arch/generic $(notdir $@) + +arch/generic/%.lo: $(SRCDIR)/arch/generic/%.c + $(MAKE) -C arch/generic $(notdir $@) + %.o: $(ARCHDIR)/%.o -cp $< $@ @@ -242,7 +253,7 @@ gzwrite.lo: $(SRCDIR)/gzwrite.c $(SHAREDTARGET): $(PIC_OBJS) $(DEFFILE) $(RCOBJS) ifneq ($(SHAREDTARGET),) - $(LDSHARED) $(CFLAGS) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC) + $(LDSHARED) $(CFLAGS) $(LDSHAREDFLAGS) $(LDVERSIONSCRIPT) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC) ifneq ($(STRIP),) $(STRIP) $@ endif @@ -356,6 +367,7 @@ uninstall: uninstall-static uninstall-shared mostlyclean: clean clean: @if [ -f $(ARCHDIR)/Makefile ]; then $(MAKE) -C $(ARCHDIR) clean; fi + @if [ -f arch/generic/Makefile ]; then $(MAKE) -C arch/generic clean; fi @if [ -f test/Makefile ]; then $(MAKE) -C test clean; fi rm -f *.o *.lo *~ \ example$(EXE) minigzip$(EXE) minigzipsh$(EXE) \ diff --git a/src/native/external/zlib-ng/PORTING.md b/src/native/external/zlib-ng/PORTING.md index c48522e3a02f7..208f6ee9e7c0f 100644 --- a/src/native/external/zlib-ng/PORTING.md +++ b/src/native/external/zlib-ng/PORTING.md @@ -4,6 +4,38 @@ Porting applications to use zlib-ng Zlib-ng can be used/compiled in two different modes, that require some consideration by the application developer. +Changes from zlib affecting native and compat modes +--------------------------------------------------- +Zlib-ng is not as conservative with memory allocation as Zlib is. + +Where Zlib's inflate will allocate a lower amount of memory depending on +compression level and window size, zlib-ng will always allocate the maximum +amount of memory and possibly leave parts of it unused. +Zlib-ng's deflate will however allocate a lower amount of memory depending +on compression level and window size. + +Zlib-ng also allocates one "big" buffer instead of doing multiple smaller +allocations. This is faster, can lead to better cache locality and reduces +space lost to alignment padding. + +At the time of writing, by default zlib-ng allocates the following amounts +of memory on a 64-bit system (except on S390x that requires ~4KiB more): +- Deflate: 350.272 Bytes +- Inflate: 42.112 Bytes + +**Advantages:** +- All memory is allocated during DeflateInit or InflateInit functions, + leaving the actual deflate/inflate functions free from allocations. +- Zlib-ng can only fail from memory allocation errors during init. +- Time spent doing memory allocation systemcalls is all done during init, + allowing applications to do prepare this before doing latency-sensitive + deflate/inflate later. +- Can reduce wasted memory due to buffer alignment padding both by OS and zlib-ng. +- Potentially improved memory locality. + +**Disadvantages:** +- Zlib-ng allocates a little more memory than zlib does. + zlib-compat mode ---------------- Zlib-ng can be compiled in zlib-compat mode, suitable for zlib-replacement diff --git a/src/native/external/zlib-ng/README.md b/src/native/external/zlib-ng/README.md index 4f9fe09c69115..411621b52ffbc 100644 --- a/src/native/external/zlib-ng/README.md +++ b/src/native/external/zlib-ng/README.md @@ -21,7 +21,6 @@ Features * Support for CPU intrinsics when available * Adler32 implementation using SSSE3, AVX2, AVX512, AVX512-VNNI, Neon, VMX & VSX * CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z - * Hash table implementation using CRC32-C intrinsics on x86 and ARM * Slide hash implementations using SSE2, AVX2, ARMv6, Neon, VMX & VSX * Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV * Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX @@ -95,20 +94,21 @@ make test Build Options ------------- -| CMake | configure | Description | Default | -|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------| -| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF | -| ZLIB_ENABLE_TESTS | | Build test binaries | ON | -| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON | -| WITH_OPTIM | --without-optimizations | Build with optimisations | ON | -| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON | -| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF | -| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF | -| WITH_GTEST | | Build gtest_zlib | ON | -| WITH_FUZZERS | | Build test/fuzz | OFF | -| WITH_BENCHMARKS | | Build test/benchmarks | OFF | -| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF | -| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF | +| CMake | configure | Description | Default | +|:---------------------------|:-------------------------|:------------------------------------------------------------------------------------|---------| +| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF | +| ZLIB_ENABLE_TESTS | | Build test binaries | ON | +| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON | +| WITH_OPTIM | --without-optimizations | Build with optimisations | ON | +| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON | +| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF | +| WITH_RUNTIME_CPU_DETECTION | | Compiles with runtime CPU detection | ON | +| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF | +| WITH_GTEST | | Build gtest_zlib | ON | +| WITH_FUZZERS | | Build test/fuzz | OFF | +| WITH_BENCHMARKS | | Build test/benchmarks | OFF | +| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF | +| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF | Install diff --git a/src/native/external/zlib-ng/adler32.c b/src/native/external/zlib-ng/adler32.c index 95ac13c3046b3..1a643ed53b2c0 100644 --- a/src/native/external/zlib-ng/adler32.c +++ b/src/native/external/zlib-ng/adler32.c @@ -7,70 +7,24 @@ #include "functable.h" #include "adler32_p.h" -/* ========================================================================= */ -Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { - uint32_t sum2; - unsigned n; - - /* split Adler-32 into component sums */ - sum2 = (adler >> 16) & 0xffff; - adler &= 0xffff; - - /* in case user likes doing a byte at a time, keep it fast */ - if (UNLIKELY(len == 1)) - return adler32_len_1(adler, buf, sum2); - - /* initial Adler-32 value (deferred check for len == 1 speed) */ - if (UNLIKELY(buf == NULL)) - return 1L; - - /* in case short lengths are provided, keep it somewhat fast */ - if (UNLIKELY(len < 16)) - return adler32_len_16(adler, buf, len, sum2); - - /* do length NMAX blocks -- requires just one modulo operation */ - while (len >= NMAX) { - len -= NMAX; -#ifdef UNROLL_MORE - n = NMAX / 16; /* NMAX is divisible by 16 */ -#else - n = NMAX / 8; /* NMAX is divisible by 8 */ -#endif - do { -#ifdef UNROLL_MORE - DO16(adler, sum2, buf); /* 16 sums unrolled */ - buf += 16; -#else - DO8(adler, sum2, buf, 0); /* 8 sums unrolled */ - buf += 8; -#endif - } while (--n); - adler %= BASE; - sum2 %= BASE; - } - - /* do remaining bytes (less than NMAX, still just one modulo) */ - return adler32_len_64(adler, buf, len, sum2); -} - #ifdef ZLIB_COMPAT unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) { - return (unsigned long)functable.adler32((uint32_t)adler, buf, len); + return (unsigned long)FUNCTABLE_CALL(adler32)((uint32_t)adler, buf, len); } #else uint32_t Z_EXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) { - return functable.adler32(adler, buf, len); + return FUNCTABLE_CALL(adler32)(adler, buf, len); } #endif /* ========================================================================= */ #ifdef ZLIB_COMPAT unsigned long Z_EXPORT PREFIX(adler32)(unsigned long adler, const unsigned char *buf, unsigned int len) { - return (unsigned long)functable.adler32((uint32_t)adler, buf, len); + return (unsigned long)FUNCTABLE_CALL(adler32)((uint32_t)adler, buf, len); } #else uint32_t Z_EXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) { - return functable.adler32(adler, buf, len); + return FUNCTABLE_CALL(adler32)(adler, buf, len); } #endif diff --git a/src/native/external/zlib-ng/arch/arm/Makefile.in b/src/native/external/zlib-ng/arch/arm/Makefile.in index 9d05b00b54ede..b6f0aaf211452 100644 --- a/src/native/external/zlib-ng/arch/arm/Makefile.in +++ b/src/native/external/zlib-ng/arch/arm/Makefile.in @@ -25,7 +25,6 @@ all: \ crc32_acle.o crc32_acle.lo \ slide_hash_neon.o slide_hash_neon.lo \ slide_hash_armv6.o slide_hash_armv6.lo \ - insert_string_acle.o insert_string_acle.lo adler32_neon.o: $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c @@ -69,12 +68,6 @@ slide_hash_armv6.o: slide_hash_armv6.lo: $(CC) $(SFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c -insert_string_acle.o: - $(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c - -insert_string_acle.lo: - $(CC) $(SFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c - mostlyclean: clean clean: rm -f *.o *.lo *~ diff --git a/src/native/external/zlib-ng/arch/arm/adler32_neon.c b/src/native/external/zlib-ng/arch/arm/adler32_neon.c index f1c43ff047497..8e46b380170b8 100644 --- a/src/native/external/zlib-ng/arch/arm/adler32_neon.c +++ b/src/native/external/zlib-ng/arch/arm/adler32_neon.c @@ -7,8 +7,8 @@ */ #ifdef ARM_NEON #include "neon_intrins.h" -#include "../../zbuild.h" -#include "../../adler32_p.h" +#include "zbuild.h" +#include "adler32_p.h" static void NEON_accum32(uint32_t *s, const uint8_t *buf, size_t len) { static const uint16_t ALIGNED_(16) taps[64] = { diff --git a/src/native/external/zlib-ng/arch/arm/arm_features.c b/src/native/external/zlib-ng/arch/arm/arm_features.c index a0e070ba95611..d0d49764f4e5d 100644 --- a/src/native/external/zlib-ng/arch/arm/arm_features.c +++ b/src/native/external/zlib-ng/arch/arm/arm_features.c @@ -1,4 +1,4 @@ -#include "../../zbuild.h" +#include "zbuild.h" #include "arm_features.h" #if defined(__linux__) && defined(HAVE_SYS_AUXV_H) @@ -11,6 +11,11 @@ # ifndef ID_AA64ISAR0_CRC32_VAL # define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32 # endif +#elif defined(__OpenBSD__) && defined(__aarch64__) +# include +# include +# include +# include #elif defined(__APPLE__) # if !defined(_DARWIN_C_SOURCE) # define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */ @@ -30,6 +35,16 @@ static int arm_has_crc32() { #elif defined(__FreeBSD__) && defined(__aarch64__) return getenv("QEMU_EMULATING") == NULL && ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE; +#elif defined(__OpenBSD__) && defined(__aarch64__) + int hascrc32 = 0; + int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 }; + uint64_t isar0 = 0; + size_t len = sizeof(isar0); + if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) { + if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE) + hascrc32 = 1; + } + return hascrc32; #elif defined(__APPLE__) int hascrc32; size_t size = sizeof(hascrc32); diff --git a/src/native/external/zlib-ng/arch/arm/arm_features.h b/src/native/external/zlib-ng/arch/arm/arm_features.h index eca078e310ead..d968e02fbb6cf 100644 --- a/src/native/external/zlib-ng/arch/arm/arm_features.h +++ b/src/native/external/zlib-ng/arch/arm/arm_features.h @@ -2,8 +2,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef ARM_H_ -#define ARM_H_ +#ifndef ARM_FEATURES_H_ +#define ARM_FEATURES_H_ struct arm_cpu_features { int has_simd; @@ -13,4 +13,4 @@ struct arm_cpu_features { void Z_INTERNAL arm_check_features(struct arm_cpu_features *features); -#endif /* ARM_H_ */ +#endif /* ARM_FEATURES_H_ */ diff --git a/src/native/external/zlib-ng/arch/arm/arm_functions.h b/src/native/external/zlib-ng/arch/arm/arm_functions.h new file mode 100644 index 0000000000000..61c682710a551 --- /dev/null +++ b/src/native/external/zlib-ng/arch/arm/arm_functions.h @@ -0,0 +1,65 @@ +/* arm_functions.h -- ARM implementations for arch-specific functions. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef ARM_FUNCTIONS_H_ +#define ARM_FUNCTIONS_H_ + +#ifdef ARM_NEON +uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t chunksize_neon(void); +uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); + +# ifdef HAVE_BUILTIN_CTZLL +uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); +uint32_t longest_match_neon(deflate_state *const s, Pos cur_match); +uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match); +# endif +void slide_hash_neon(deflate_state *s); +void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef ARM_ACLE +uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); +#endif + +#ifdef ARM_SIMD +void slide_hash_armv6(deflate_state *s); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// ARM - SIMD +# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD) +# undef native_slide_hash +# define native_slide_hash slide_hash_armv6 +# endif +// ARM - NEON +# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON +# undef native_adler32 +# define native_adler32 adler32_neon +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_neon +# undef native_chunksize +# define native_chunksize chunksize_neon +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_neon +# undef native_slide_hash +# define native_slide_hash slide_hash_neon +# ifdef HAVE_BUILTIN_CTZLL +# undef native_compare256 +# define native_compare256 compare256_neon +# undef native_longest_match +# define native_longest_match longest_match_neon +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_neon +# endif +# endif +// ARM - ACLE +# if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32) +# undef native_crc32 +# define native_crc32 crc32_acle +# endif +#endif + +#endif /* ARM_FUNCTIONS_H_ */ diff --git a/src/native/external/zlib-ng/arch/arm/chunkset_neon.c b/src/native/external/zlib-ng/arch/arm/chunkset_neon.c index f9a444b0681fc..1c49ef5612355 100644 --- a/src/native/external/zlib-ng/arch/arm/chunkset_neon.c +++ b/src/native/external/zlib-ng/arch/arm/chunkset_neon.c @@ -4,8 +4,8 @@ #ifdef ARM_NEON #include "neon_intrins.h" -#include "../../zbuild.h" -#include "../generic/chunk_permute_table.h" +#include "zbuild.h" +#include "arch/generic/chunk_permute_table.h" typedef uint8x16_t chunk_t; diff --git a/src/native/external/zlib-ng/arch/arm/compare256_neon.c b/src/native/external/zlib-ng/arch/arm/compare256_neon.c index 7daeba411ecea..87d14c89c09bc 100644 --- a/src/native/external/zlib-ng/arch/arm/compare256_neon.c +++ b/src/native/external/zlib-ng/arch/arm/compare256_neon.c @@ -3,8 +3,9 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" - +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) diff --git a/src/native/external/zlib-ng/arch/arm/crc32_acle.c b/src/native/external/zlib-ng/arch/arm/crc32_acle.c index ac7d6ff66b3ea..116bcab1c23f1 100644 --- a/src/native/external/zlib-ng/arch/arm/crc32_acle.c +++ b/src/native/external/zlib-ng/arch/arm/crc32_acle.c @@ -7,7 +7,7 @@ #ifdef ARM_ACLE #include "acle_intrins.h" -#include "../../zbuild.h" +#include "zbuild.h" Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) { Z_REGISTER uint32_t c; diff --git a/src/native/external/zlib-ng/arch/arm/neon_intrins.h b/src/native/external/zlib-ng/arch/arm/neon_intrins.h index 51df77dbe6855..a9e99ec88a9ce 100644 --- a/src/native/external/zlib-ng/arch/arm/neon_intrins.h +++ b/src/native/external/zlib-ng/arch/arm/neon_intrins.h @@ -25,6 +25,13 @@ out.val[3] = vqsubq_u16(a.val[3], b); \ } while (0) +# if defined(__clang__) && defined(__arm__) && defined(__ANDROID__) +/* Clang for 32-bit Android has too strict alignment requirement (:256) for x4 NEON intrinsics */ +# undef ARM_NEON_HASLD4 +# undef vld1q_u16_x4 +# undef vld1q_u8_x4 +# undef vst1q_u16_x4 +# endif # ifndef ARM_NEON_HASLD4 diff --git a/src/native/external/zlib-ng/arch/arm/slide_hash_armv6.c b/src/native/external/zlib-ng/arch/arm/slide_hash_armv6.c index 3a715a1551df6..da4f51e0afc82 100644 --- a/src/native/external/zlib-ng/arch/arm/slide_hash_armv6.c +++ b/src/native/external/zlib-ng/arch/arm/slide_hash_armv6.c @@ -5,8 +5,8 @@ #if defined(ARM_SIMD) #include "acle_intrins.h" -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" /* SIMD version of hash_chain rebase */ static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { diff --git a/src/native/external/zlib-ng/arch/arm/slide_hash_neon.c b/src/native/external/zlib-ng/arch/arm/slide_hash_neon.c index 2889e9c046027..f319f987908a2 100644 --- a/src/native/external/zlib-ng/arch/arm/slide_hash_neon.c +++ b/src/native/external/zlib-ng/arch/arm/slide_hash_neon.c @@ -10,8 +10,8 @@ #ifdef ARM_NEON #include "neon_intrins.h" -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" /* SIMD version of hash_chain rebase */ static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { diff --git a/src/native/external/zlib-ng/arch/generic/Makefile.in b/src/native/external/zlib-ng/arch/generic/Makefile.in index c717026f86e4a..32c8242d026f1 100644 --- a/src/native/external/zlib-ng/arch/generic/Makefile.in +++ b/src/native/external/zlib-ng/arch/generic/Makefile.in @@ -1,5 +1,6 @@ -# Makefile for zlib +# Makefile for zlib-ng # Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler +# Copyright (C) 2024 Hans Kristian Rosbach # For conditions of distribution and use, see copyright notice in zlib.h CC= @@ -11,12 +12,62 @@ SRCDIR=. SRCTOP=../.. TOPDIR=$(SRCTOP) -all: +all: \ + adler32_c.o adler32_c.lo \ + adler32_fold_c.o adler32_fold_c.lo \ + chunkset_c.o chunkset_c.lo \ + compare256_c.o compare256_c.lo \ + crc32_braid_c.o crc32_braid_c.lo \ + crc32_fold_c.o crc32_fold_c.lo \ + slide_hash_c.o slide_hash_c.lo + + +adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c + +adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c + +adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c + +adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c + +chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c + +chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c + +compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c + +compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c + +crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c + +crc32_braid_c.lo: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c + +crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c + +crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c + +slide_hash_c.o: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c + +slide_hash_c.lo: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c mostlyclean: clean clean: - rm -f *.o *.lo *~ \ + rm -f *.o *.lo *~ rm -rf objs rm -f *.gcda *.gcno *.gcov diff --git a/src/native/external/zlib-ng/arch/generic/adler32_c.c b/src/native/external/zlib-ng/arch/generic/adler32_c.c new file mode 100644 index 0000000000000..64258c89b4940 --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/adler32_c.c @@ -0,0 +1,54 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" +#include "functable.h" +#include "adler32_p.h" + +/* ========================================================================= */ +Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { + uint32_t sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (UNLIKELY(len == 1)) + return adler32_len_1(adler, buf, sum2); + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (UNLIKELY(buf == NULL)) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (UNLIKELY(len < 16)) + return adler32_len_16(adler, buf, len, sum2); + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; +#ifdef UNROLL_MORE + n = NMAX / 16; /* NMAX is divisible by 16 */ +#else + n = NMAX / 8; /* NMAX is divisible by 8 */ +#endif + do { +#ifdef UNROLL_MORE + DO16(adler, sum2, buf); /* 16 sums unrolled */ + buf += 16; +#else + DO8(adler, sum2, buf, 0); /* 8 sums unrolled */ + buf += 8; +#endif + } while (--n); + adler %= BASE; + sum2 %= BASE; + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + return adler32_len_64(adler, buf, len, sum2); +} diff --git a/src/native/external/zlib-ng/arch/generic/adler32_fold_c.c b/src/native/external/zlib-ng/arch/generic/adler32_fold_c.c new file mode 100644 index 0000000000000..397dd104002cd --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/adler32_fold_c.c @@ -0,0 +1,15 @@ +/* adler32_fold.c -- adler32 folding interface + * Copyright (C) 2022 Adam Stylinski + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" +#include "functable.h" + +#include + +Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { + adler = FUNCTABLE_CALL(adler32)(adler, src, len); + memcpy(dst, src, len); + return adler; +} diff --git a/src/native/external/zlib-ng/arch/generic/chunkset_c.c b/src/native/external/zlib-ng/arch/generic/chunkset_c.c new file mode 100644 index 0000000000000..7b2bb7ba3676a --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/chunkset_c.c @@ -0,0 +1,42 @@ +/* chunkset.c -- inline functions to copy small data chunks. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" + +typedef uint64_t chunk_t; + +#define CHUNK_SIZE 8 + +#define HAVE_CHUNKMEMSET_4 +#define HAVE_CHUNKMEMSET_8 + +static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { + uint8_t *dest = (uint8_t *)chunk; + memcpy(dest, from, sizeof(uint32_t)); + memcpy(dest+4, from, sizeof(uint32_t)); +} + +static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { + memcpy(chunk, from, sizeof(uint64_t)); +} + +static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { + memcpy(chunk, (uint8_t *)s, sizeof(uint64_t)); +} + +static inline void storechunk(uint8_t *out, chunk_t *chunk) { + memcpy(out, chunk, sizeof(uint64_t)); +} + +#define CHUNKSIZE chunksize_c +#define CHUNKCOPY chunkcopy_c +#define CHUNKUNROLL chunkunroll_c +#define CHUNKMEMSET chunkmemset_c +#define CHUNKMEMSET_SAFE chunkmemset_safe_c + +#include "chunkset_tpl.h" + +#define INFLATE_FAST inflate_fast_c + +#include "inffast_tpl.h" diff --git a/src/native/external/zlib-ng/arch/generic/compare256_c.c b/src/native/external/zlib-ng/arch/generic/compare256_c.c new file mode 100644 index 0000000000000..0c12cb3a4ec4b --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/compare256_c.c @@ -0,0 +1,181 @@ +/* compare256.c -- 256 byte memory comparison with match length return + * Copyright (C) 2020 Nathan Moinvaziri + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" +#include "fallback_builtins.h" + +/* ALIGNED, byte comparison */ +static inline uint32_t compare256_c_static(const uint8_t *src0, const uint8_t *src1) { + uint32_t len = 0; + + do { + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len; + src0 += 1, src1 += 1, len += 1; + } while (len < 256); + + return 256; +} + +Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) { + return compare256_c_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_c +#define COMPARE256 compare256_c_static + +#include "match_tpl.h" + +#define LONGEST_MATCH_SLOW +#define LONGEST_MATCH longest_match_slow_c +#define COMPARE256 compare256_c_static + +#include "match_tpl.h" + +#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +/* 16-bit unaligned integer comparison */ +static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) { + uint32_t len = 0; + + do { + if (zng_memcmp_2(src0, src1) != 0) + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; + + if (zng_memcmp_2(src0, src1) != 0) + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; + + if (zng_memcmp_2(src0, src1) != 0) + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; + + if (zng_memcmp_2(src0, src1) != 0) + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; + } while (len < 256); + + return 256; +} + +Z_INTERNAL uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1) { + return compare256_unaligned_16_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_16 +#define COMPARE256 compare256_unaligned_16_static + +#include "match_tpl.h" + +#define LONGEST_MATCH_SLOW +#define LONGEST_MATCH longest_match_slow_unaligned_16 +#define COMPARE256 compare256_unaligned_16_static + +#include "match_tpl.h" + +#ifdef HAVE_BUILTIN_CTZ +/* 32-bit unaligned integer comparison */ +static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const uint8_t *src1) { + uint32_t len = 0; + + do { + uint32_t sv, mv, diff; + + memcpy(&sv, src0, sizeof(sv)); + memcpy(&mv, src1, sizeof(mv)); + + diff = sv ^ mv; + if (diff) { + uint32_t match_byte = __builtin_ctz(diff) / 8; + return len + match_byte; + } + + src0 += 4, src1 += 4, len += 4; + } while (len < 256); + + return 256; +} + +Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1) { + return compare256_unaligned_32_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_32 +#define COMPARE256 compare256_unaligned_32_static + +#include "match_tpl.h" + +#define LONGEST_MATCH_SLOW +#define LONGEST_MATCH longest_match_slow_unaligned_32 +#define COMPARE256 compare256_unaligned_32_static + +#include "match_tpl.h" + +#endif + +#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +/* UNALIGNED64_OK, 64-bit integer comparison */ +static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) { + uint32_t len = 0; + + do { + uint64_t sv, mv, diff; + + memcpy(&sv, src0, sizeof(sv)); + memcpy(&mv, src1, sizeof(mv)); + + diff = sv ^ mv; + if (diff) { + uint64_t match_byte = __builtin_ctzll(diff) / 8; + return len + (uint32_t)match_byte; + } + + src0 += 8, src1 += 8, len += 8; + } while (len < 256); + + return 256; +} + +Z_INTERNAL uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1) { + return compare256_unaligned_64_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_64 +#define COMPARE256 compare256_unaligned_64_static + +#include "match_tpl.h" + +#define LONGEST_MATCH_SLOW +#define LONGEST_MATCH longest_match_slow_unaligned_64 +#define COMPARE256 compare256_unaligned_64_static + +#include "match_tpl.h" + +#endif + +#endif diff --git a/src/native/external/zlib-ng/arch/generic/crc32_braid_c.c b/src/native/external/zlib-ng/arch/generic/crc32_braid_c.c new file mode 100644 index 0000000000000..f80071042d77c --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/crc32_braid_c.c @@ -0,0 +1,216 @@ +/* crc32_braid.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * This interleaved implementation of a CRC makes use of pipelined multiple + * arithmetic-logic units, commonly found in modern CPU cores. It is due to + * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. + */ + +#include "zbuild.h" +#include "crc32_braid_p.h" +#include "crc32_braid_tbl.h" + +/* + A CRC of a message is computed on N braids of words in the message, where + each word consists of W bytes (4 or 8). If N is 3, for example, then three + running sparse CRCs are calculated respectively on each braid, at these + indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... + This is done starting at a word boundary, and continues until as many blocks + of N * W bytes as are available have been processed. The results are combined + into a single CRC at the end. For this code, N must be in the range 1..6 and + W must be 4 or 8. The upper limit on N can be increased if desired by adding + more #if blocks, extending the patterns apparent in the code. In addition, + crc32 tables would need to be regenerated, if the maximum N value is increased. + + N and W are chosen empirically by benchmarking the execution time on a given + processor. The choices for N and W below were based on testing on Intel Kaby + Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 + Octeon II processors. The Intel, AMD, and ARM processors were all fastest + with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. + They were all tested with either gcc or clang, all using the -O3 optimization + level. Your mileage may vary. +*/ + +/* ========================================================================= */ +#ifdef W +/* + Return the CRC of the W bytes in the word_t data, taking the + least-significant byte of the word as the first byte of data, without any pre + or post conditioning. This is used to combine the CRCs of each braid. + */ +#if BYTE_ORDER == LITTLE_ENDIAN +static uint32_t crc_word(z_word_t data) { + int k; + for (k = 0; k < W; k++) + data = (data >> 8) ^ crc_table[data & 0xff]; + return (uint32_t)data; +} +#elif BYTE_ORDER == BIG_ENDIAN +static z_word_t crc_word(z_word_t data) { + int k; + for (k = 0; k < W; k++) + data = (data << 8) ^ + crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; + return data; +} +#endif /* BYTE_ORDER */ + +#endif /* W */ + +/* ========================================================================= */ +Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) { + uint32_t c; + + /* Pre-condition the CRC */ + c = (~crc) & 0xffffffff; + +#ifdef W + /* If provided enough bytes, do a braided CRC calculation. */ + if (len >= N * W + W - 1) { + size_t blks; + z_word_t const *words; + int k; + + /* Compute the CRC up to a z_word_t boundary. */ + while (len && ((uintptr_t)buf & (W - 1)) != 0) { + len--; + DO1; + } + + /* Compute the CRC on as many N z_word_t blocks as are available. */ + blks = len / (N * W); + len -= blks * N * W; + words = (z_word_t const *)buf; + + z_word_t crc0, word0, comb; +#if N > 1 + z_word_t crc1, word1; +#if N > 2 + z_word_t crc2, word2; +#if N > 3 + z_word_t crc3, word3; +#if N > 4 + z_word_t crc4, word4; +#if N > 5 + z_word_t crc5, word5; +#endif +#endif +#endif +#endif +#endif + /* Initialize the CRC for each braid. */ + crc0 = ZSWAPWORD(c); +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + /* Process the first blks-1 blocks, computing the CRCs on each braid independently. */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should get unrolled. */ + crc0 = BRAID_TABLE[0][word0 & 0xff]; +#if N > 1 + crc1 = BRAID_TABLE[0][word1 & 0xff]; +#if N > 2 + crc2 = BRAID_TABLE[0][word2 & 0xff]; +#if N > 3 + crc3 = BRAID_TABLE[0][word3 & 0xff]; +#if N > 4 + crc4 = BRAID_TABLE[0][word4 & 0xff]; +#if N > 5 + crc5 = BRAID_TABLE[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* Process the last block, combining the CRCs of the N braids at the same time. */ + comb = crc_word(crc0 ^ words[0]); +#if N > 1 + comb = crc_word(crc1 ^ words[1] ^ comb); +#if N > 2 + comb = crc_word(crc2 ^ words[2] ^ comb); +#if N > 3 + comb = crc_word(crc3 ^ words[3] ^ comb); +#if N > 4 + comb = crc_word(crc4 ^ words[4] ^ comb); +#if N > 5 + comb = crc_word(crc5 ^ words[5] ^ comb); +#endif +#endif +#endif +#endif +#endif + words += N; + Assert(comb <= UINT32_MAX, "comb should fit in uint32_t"); + c = (uint32_t)ZSWAPWORD(comb); + + /* Update the pointer to the remaining bytes to process. */ + buf = (const unsigned char *)words; + } + +#endif /* W */ + + /* Complete the computation of the CRC on any remaining bytes. */ + while (len >= 8) { + len -= 8; + DO8; + } + while (len) { + len--; + DO1; + } + + /* Return the CRC, post-conditioned. */ + return c ^ 0xffffffff; +} diff --git a/src/native/external/zlib-ng/arch/generic/crc32_fold_c.c b/src/native/external/zlib-ng/arch/generic/crc32_fold_c.c new file mode 100644 index 0000000000000..43930e97c61fe --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/crc32_fold_c.c @@ -0,0 +1,31 @@ +/* crc32_fold.c -- crc32 folding interface + * Copyright (C) 2021 Nathan Moinvaziri + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include "zbuild.h" +#include "zutil.h" +#include "functable.h" +#include "crc32.h" + +Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) { + crc->value = CRC32_INITIAL_VALUE; + return crc->value; +} + +Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) { + crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len); + memcpy(dst, src, len); +} + +Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) { + /* Note: while this is basically the same thing as the vanilla CRC function, we still need + * a functable entry for it so that we can generically dispatch to this function with the + * same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The + * init_crc is an unused argument in this context */ + Z_UNUSED(init_crc); + crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len); +} + +Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc) { + return crc->value; +} diff --git a/src/native/external/zlib-ng/arch/generic/generic_functions.h b/src/native/external/zlib-ng/arch/generic/generic_functions.h new file mode 100644 index 0000000000000..997dd4d01eeea --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/generic_functions.h @@ -0,0 +1,106 @@ +/* generic_functions.h -- generic C implementations for arch-specific functions. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef GENERIC_FUNCTIONS_H_ +#define GENERIC_FUNCTIONS_H_ + +#include "zendian.h" + +Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc); +Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); +Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc); + +Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); + + +typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); +typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1); +typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len); + +uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); + +uint32_t chunksize_c(void); +uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); +void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); + +uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); + +uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); +#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); +# ifdef HAVE_BUILTIN_CTZ + uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); +# endif +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) + uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); +# endif +#endif + +typedef void (*slide_hash_func)(deflate_state *s); + +void slide_hash_c(deflate_state *s); + +uint32_t longest_match_c(deflate_state *const s, Pos cur_match); +# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN + uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); +# ifdef HAVE_BUILTIN_CTZ + uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); +# endif +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) + uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); +# endif +# endif + +uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); +# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN + uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); +# ifdef UNALIGNED64_OK + uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); +# endif +# endif + + +// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions. +#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +# define longest_match_generic longest_match_unaligned_64 +# define longest_match_slow_generic longest_match_slow_unaligned_64 +# define compare256_generic compare256_unaligned_64 +# elif defined(HAVE_BUILTIN_CTZ) +# define longest_match_generic longest_match_unaligned_32 +# define longest_match_slow_generic longest_match_slow_unaligned_32 +# define compare256_generic compare256_unaligned_32 +# else +# define longest_match_generic longest_match_unaligned_16 +# define longest_match_slow_generic longest_match_slow_unaligned_16 +# define compare256_generic compare256_unaligned_16 +# endif +#else +# define longest_match_generic longest_match_c +# define longest_match_slow_generic longest_match_slow_c +# define compare256_generic compare256_c +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// Generic code +# define native_adler32 adler32_c +# define native_adler32_fold_copy adler32_fold_copy_c +# define native_chunkmemset_safe chunkmemset_safe_c +# define native_chunksize chunksize_c +# define native_crc32 PREFIX(crc32_braid) +# define native_crc32_fold crc32_fold_c +# define native_crc32_fold_copy crc32_fold_copy_c +# define native_crc32_fold_final crc32_fold_final_c +# define native_crc32_fold_reset crc32_fold_reset_c +# define native_inflate_fast inflate_fast_c +# define native_slide_hash slide_hash_c +# define native_longest_match longest_match_generic +# define native_longest_match_slow longest_match_slow_generic +# define native_compare256 compare256_generic +#endif + +#endif diff --git a/src/native/external/zlib-ng/arch/generic/slide_hash_c.c b/src/native/external/zlib-ng/arch/generic/slide_hash_c.c new file mode 100644 index 0000000000000..8345b9e36b852 --- /dev/null +++ b/src/native/external/zlib-ng/arch/generic/slide_hash_c.c @@ -0,0 +1,52 @@ +/* slide_hash.c -- slide hash table C implementation + * + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" +#include "deflate.h" + +/* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +static inline void slide_hash_c_chain(Pos *table, uint32_t entries, uint16_t wsize) { +#ifdef NOT_TWEAK_COMPILER + table += entries; + do { + unsigned m; + m = *--table; + *table = (Pos)(m >= wsize ? m-wsize : 0); + /* If entries is not on any hash chain, prev[entries] is garbage but + * its value will never be used. + */ + } while (--entries); +#else + { + /* As of I make this change, gcc (4.8.*) isn't able to vectorize + * this hot loop using saturated-subtraction on x86-64 architecture. + * To avoid this defect, we can change the loop such that + * o. the pointer advance forward, and + * o. demote the variable 'm' to be local to the loop, and + * choose type "Pos" (instead of 'unsigned int') for the + * variable to avoid unnecessary zero-extension. + */ + unsigned int i; + Pos *q = table; + for (i = 0; i < entries; i++) { + Pos m = *q; + Pos t = (Pos)wsize; + *q++ = (Pos)(m >= t ? m-t: 0); + } + } +#endif /* NOT_TWEAK_COMPILER */ +} + +Z_INTERNAL void slide_hash_c(deflate_state *s) { + uint16_t wsize = (uint16_t)s->w_size; + + slide_hash_c_chain(s->head, HASH_SIZE, wsize); + slide_hash_c_chain(s->prev, wsize, wsize); +} diff --git a/src/native/external/zlib-ng/arch/power/chunkset_power8.c b/src/native/external/zlib-ng/arch/power/chunkset_power8.c index 7cbb8029b3b14..aef19732736f5 100644 --- a/src/native/external/zlib-ng/arch/power/chunkset_power8.c +++ b/src/native/external/zlib-ng/arch/power/chunkset_power8.c @@ -4,7 +4,7 @@ #ifdef POWER8_VSX #include -#include "../../zbuild.h" +#include "zbuild.h" typedef vector unsigned char chunk_t; diff --git a/src/native/external/zlib-ng/arch/power/compare256_power9.c b/src/native/external/zlib-ng/arch/power/compare256_power9.c index 9b0ddaf800450..c8be498e4f60b 100644 --- a/src/native/external/zlib-ng/arch/power/compare256_power9.c +++ b/src/native/external/zlib-ng/arch/power/compare256_power9.c @@ -5,8 +5,10 @@ #ifdef POWER9 #include -#include "../../zbuild.h" -#include "../../zendian.h" +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" +#include "zendian.h" /* Older versions of GCC misimplemented semantics for these bit counting builtins. * https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */ diff --git a/src/native/external/zlib-ng/arch/power/power_features.c b/src/native/external/zlib-ng/arch/power/power_features.c index f73503734b13d..4939d1c18f342 100644 --- a/src/native/external/zlib-ng/arch/power/power_features.c +++ b/src/native/external/zlib-ng/arch/power/power_features.c @@ -1,16 +1,19 @@ /* power_features.c - POWER feature check * Copyright (C) 2020 Matheus Castanho , IBM - * Copyright (C) 2021-2022 Mika T. Lindqvist + * Copyright (C) 2021-2024 Mika T. Lindqvist * For conditions of distribution and use, see copyright notice in zlib.h */ #ifdef HAVE_SYS_AUXV_H # include #endif +#ifdef POWER_NEED_AUXVEC_H +# include +#endif #ifdef __FreeBSD__ # include #endif -#include "../../zbuild.h" +#include "zbuild.h" #include "power_features.h" void Z_INTERNAL power_check_features(struct power_cpu_features *features) { diff --git a/src/native/external/zlib-ng/arch/power/power_features.h b/src/native/external/zlib-ng/arch/power/power_features.h index 9252364cc48d2..1ff51de5ddc58 100644 --- a/src/native/external/zlib-ng/arch/power/power_features.h +++ b/src/native/external/zlib-ng/arch/power/power_features.h @@ -4,8 +4,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef POWER_H_ -#define POWER_H_ +#ifndef POWER_FEATURES_H_ +#define POWER_FEATURES_H_ struct power_cpu_features { int has_altivec; @@ -15,4 +15,4 @@ struct power_cpu_features { void Z_INTERNAL power_check_features(struct power_cpu_features *features); -#endif /* POWER_H_ */ +#endif /* POWER_FEATURES_H_ */ diff --git a/src/native/external/zlib-ng/arch/power/power_functions.h b/src/native/external/zlib-ng/arch/power/power_functions.h new file mode 100644 index 0000000000000..cb6b7650ecafa --- /dev/null +++ b/src/native/external/zlib-ng/arch/power/power_functions.h @@ -0,0 +1,67 @@ +/* power_functions.h -- POWER implementations for arch-specific functions. + * Copyright (C) 2020 Matheus Castanho , IBM + * Copyright (C) 2021 Mika T. Lindqvist + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef POWER_FUNCTIONS_H_ +#define POWER_FUNCTIONS_H_ + +#ifdef PPC_VMX +uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len); +void slide_hash_vmx(deflate_state *s); +#endif + +#ifdef POWER8_VSX +uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t chunksize_power8(void); +uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left); +uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); +void slide_hash_power8(deflate_state *s); +void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef POWER9 +uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); +uint32_t longest_match_power9(deflate_state *const s, Pos cur_match); +uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// Power - VMX +# if defined(PPC_VMX) && defined(__ALTIVEC__) +# undef native_adler32 +# define native_adler32 adler32_vmx +# undef native_slide_hash +# define native_slide_hash slide_hash_vmx +# endif +// Power8 - VSX +# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__) +# undef native_adler32 +# define native_adler32 adler32_power8 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_power8 +# undef native_chunksize +# define native_chunksize chunksize_power8 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_power8 +# undef native_slide_hash +# define native_slide_hash slide_hash_power8 +# endif +# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__) +# undef native_crc32 +# define native_crc32 crc32_power8 +# endif +// Power9 +# if defined(POWER9) && defined(_ARCH_PWR9) +# undef native_compare256 +# define native_compare256 compare256_power9 +# undef native_longest_match +# define native_longest_match longest_match_power9 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_power9 +# endif +#endif + +#endif /* POWER_FUNCTIONS_H_ */ diff --git a/src/native/external/zlib-ng/arch/riscv/adler32_rvv.c b/src/native/external/zlib-ng/arch/riscv/adler32_rvv.c index da46f37e73c18..d0f9aaa567b38 100644 --- a/src/native/external/zlib-ng/arch/riscv/adler32_rvv.c +++ b/src/native/external/zlib-ng/arch/riscv/adler32_rvv.c @@ -9,8 +9,8 @@ #include #include -#include "../../zbuild.h" -#include "../../adler32_p.h" +#include "zbuild.h" +#include "adler32_p.h" static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) { /* split Adler-32 into component sums */ diff --git a/src/native/external/zlib-ng/arch/riscv/compare256_rvv.c b/src/native/external/zlib-ng/arch/riscv/compare256_rvv.c index 0fd6082c44d0a..3d6c3e3aa5b1e 100644 --- a/src/native/external/zlib-ng/arch/riscv/compare256_rvv.c +++ b/src/native/external/zlib-ng/arch/riscv/compare256_rvv.c @@ -6,7 +6,9 @@ #ifdef RISCV_RVV -#include "../../zbuild.h" +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #include diff --git a/src/native/external/zlib-ng/arch/riscv/riscv_features.c b/src/native/external/zlib-ng/arch/riscv/riscv_features.c index b066f427e0fc3..1e3f45e0a73a5 100644 --- a/src/native/external/zlib-ng/arch/riscv/riscv_features.c +++ b/src/native/external/zlib-ng/arch/riscv/riscv_features.c @@ -1,10 +1,13 @@ #include #include #include -#include #include -#include "../../zbuild.h" +#if defined(__linux__) && defined(HAVE_SYS_AUXV_H) +# include +#endif + +#include "zbuild.h" #include "riscv_features.h" #define ISA_V_HWCAP (1 << ('v' - 'a')) @@ -33,7 +36,11 @@ void Z_INTERNAL riscv_check_features_compile_time(struct riscv_cpu_features *fea } void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features) { +#if defined(__linux__) && defined(HAVE_SYS_AUXV_H) unsigned long hw_cap = getauxval(AT_HWCAP); +#else + unsigned long hw_cap = 0; +#endif features->has_rvv = hw_cap & ISA_V_HWCAP; } diff --git a/src/native/external/zlib-ng/arch/riscv/riscv_features.h b/src/native/external/zlib-ng/arch/riscv/riscv_features.h index c76e967c36cec..b1593acc25670 100644 --- a/src/native/external/zlib-ng/arch/riscv/riscv_features.h +++ b/src/native/external/zlib-ng/arch/riscv/riscv_features.h @@ -6,8 +6,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef RISCV_H_ -#define RISCV_H_ +#ifndef RISCV_FEATURES_H_ +#define RISCV_FEATURES_H_ struct riscv_cpu_features { int has_rvv; @@ -15,4 +15,4 @@ struct riscv_cpu_features { void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features); -#endif /* RISCV_H_ */ +#endif /* RISCV_FEATURES_H_ */ diff --git a/src/native/external/zlib-ng/arch/riscv/riscv_functions.h b/src/native/external/zlib-ng/arch/riscv/riscv_functions.h new file mode 100644 index 0000000000000..015b2fbd75c42 --- /dev/null +++ b/src/native/external/zlib-ng/arch/riscv/riscv_functions.h @@ -0,0 +1,49 @@ +/* riscv_functions.h -- RISCV implementations for arch-specific functions. + * + * Copyright (C) 2023 SiFive, Inc. All rights reserved. + * Contributed by Alex Chiang + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef RISCV_FUNCTIONS_H_ +#define RISCV_FUNCTIONS_H_ + +#ifdef RISCV_RVV +uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +uint32_t chunksize_rvv(void); +uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left); +uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); + +uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match); +uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match); +void slide_hash_rvv(deflate_state *s); +void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// RISCV - RVV +# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__) +# undef native_adler32 +# define native_adler32 adler32_rvv +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_rvv +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_rvv +# undef native_chunksize +# define native_chunksize chunksize_rvv +# undef native_compare256 +# define native_compare256 compare256_rvv +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_rvv +# undef native_longest_match +# define native_longest_match longest_match_rvv +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_rvv +# undef native_slide_hash +# define native_slide_hash slide_hash_rvv +# endif +#endif + +#endif /* RISCV_FUNCTIONS_H_ */ diff --git a/src/native/external/zlib-ng/arch/riscv/slide_hash_rvv.c b/src/native/external/zlib-ng/arch/riscv/slide_hash_rvv.c index b70a44b63e0e7..6f53d7a13ad81 100644 --- a/src/native/external/zlib-ng/arch/riscv/slide_hash_rvv.c +++ b/src/native/external/zlib-ng/arch/riscv/slide_hash_rvv.c @@ -8,18 +8,16 @@ #include -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { size_t vl; while (entries > 0) { vl = __riscv_vsetvl_e16m4(entries); vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl); - vuint16m4_t v_diff = __riscv_vsub_vx_u16m4(v_tab, wsize, vl); - vbool4_t mask = __riscv_vmsltu_vx_u16m4_b4(v_tab, wsize, vl); - v_tab = __riscv_vmerge_vxm_u16m4(v_diff, 0, mask, vl); - __riscv_vse16_v_u16m4(table, v_tab, vl); + vuint16m4_t v_diff = __riscv_vssubu_vx_u16m4(v_tab, wsize, vl); + __riscv_vse16_v_u16m4(table, v_diff, vl); table += vl, entries -= vl; } } diff --git a/src/native/external/zlib-ng/arch/s390/Makefile.in b/src/native/external/zlib-ng/arch/s390/Makefile.in index 6b4fba7775c4a..e994157df2d34 100644 --- a/src/native/external/zlib-ng/arch/s390/Makefile.in +++ b/src/native/external/zlib-ng/arch/s390/Makefile.in @@ -20,12 +20,6 @@ s390_features.o: s390_features.lo: $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c -dfltcc_common.o: - $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_common.c - -dfltcc_common.lo: - $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_common.c - dfltcc_deflate.o: $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c diff --git a/src/native/external/zlib-ng/arch/s390/README.md b/src/native/external/zlib-ng/arch/s390/README.md index 2c3165412c004..7b383cc9981aa 100644 --- a/src/native/external/zlib-ng/arch/s390/README.md +++ b/src/native/external/zlib-ng/arch/s390/README.md @@ -61,11 +61,11 @@ integrated with the rest of zlib-ng using hook macros. ## Hook macros DFLTCC takes as arguments a parameter block, an input buffer, an output -buffer and a window. `ZALLOC_DEFLATE_STATE()`, `ZALLOC_INFLATE_STATE()`, -`ZFREE_STATE()`, `ZCOPY_DEFLATE_STATE()`, `ZCOPY_INFLATE_STATE()`, -`ZALLOC_WINDOW()`, `ZCOPY_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate -allocation details for the parameter block (which is allocated alongside -zlib-ng state) and the window (which must be page-aligned and large enough). +buffer, and a window. Parameter blocks are stored alongside zlib states; +buffers are forwarded from the caller; and window - which must be +4k-aligned and is always 64k large, is managed using the `PAD_WINDOW()`, +`WINDOW_PAD_SIZE`, `HINT_ALIGNED_WINDOW` and `DEFLATE_ADJUST_WINDOW_SIZE()` +and `INFLATE_ADJUST_WINDOW_SIZE()` hooks. Software and hardware window formats do not match, therefore, `deflateSetDictionary()`, `deflateGetDictionary()`, `inflateSetDictionary()` @@ -117,8 +117,7 @@ converted to calls to functions, which are implemented in `arch/s390/dfltcc_*` files. The functions can be grouped in three broad categories: -* Base DFLTCC support, e.g. wrapping the machine instruction - - `dfltcc()` and allocating aligned memory - `dfltcc_alloc_state()`. +* Base DFLTCC support, e.g. wrapping the machine instruction - `dfltcc()`. * Translating between software and hardware data formats, e.g. `dfltcc_deflate_set_dictionary()`. * Translating between software and hardware state machines, e.g. @@ -214,29 +213,31 @@ DFLTCC is a non-privileged instruction, neither special VM/LPAR configuration nor root are required. zlib-ng CI uses an IBM-provided z15 self-hosted builder for the DFLTCC -testing. There are no IBM Z builds of GitHub Actions runner, and -stable qemu-user has problems with .NET apps, so the builder runs the -x86_64 runner version with qemu-user built from the master branch. +testing. There is no official IBM Z GitHub Actions runner, so we build +one inspired by `anup-kodlekere/gaplib`. +Future updates to actions-runner might need an updated patch. The .net +version number patch has been separated into a separate file to avoid a +need for constantly changing the patch. ## Configuring the builder. ### Install prerequisites. ``` -$ sudo dnf install docker +sudo dnf install podman ``` -### Add services. +### Add actions-runner service. ``` -$ sudo cp self-hosted-builder/*.service /etc/systemd/system/ -$ sudo systemctl daemon-reload +sudo cp self-hosted-builder/actions-runner.service /etc/systemd/system/ +sudo systemctl daemon-reload ``` -### Create a config file. +### Create a config file, needs github personal access token. ``` -$ sudo tee /etc/actions-runner +# Create file /etc/actions-runner repo=/ access_token= ``` @@ -245,40 +246,32 @@ Access token should have the repo scope, consult https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository for details. -### Autostart the x86_64 emulation support. +### Autostart actions-runner. ``` -$ sudo systemctl enable --now qemu-user-static +$ sudo systemctl enable --now actions-runner ``` -### Autostart the runner. +## Rebuilding the container +In order to update the `gaplib-actions-runner` podman container, e.g. to get the +latest OS security fixes, follow these steps: ``` -$ sudo systemctl enable --now actions-runner -``` +# Stop actions-runner service +sudo systemctl stop actions-runner -## Rebuilding the image +# Delete old container +sudo podman container rm gaplib-actions-runner -In order to update the `iiilinuxibmcom/actions-runner` image, e.g. to get the -latest OS security fixes, use the following commands: +# Delete old image +sudo podman image rm localhost/zlib-ng/actions-runner -``` -$ sudo docker build \ - --pull \ - -f self-hosted-builder/actions-runner.Dockerfile \ - -t iiilinuxibmcom/actions-runner -$ sudo systemctl restart actions-runner -``` - -## Removing persistent data +# Build image +sudo podman build --squash -f Dockerfile.zlib-ng --tag zlib-ng/actions-runner --build-arg . -The `actions-runner` service stores various temporary data, such as runner -registration information, work directories and logs, in the `actions-runner` -volume. In order to remove it and start from scratch, e.g. when switching the -runner to a different repository, use the following commands: +# Build container +sudo podman create --name=gaplib-actions-runner --env-file=/etc/actions-runner --init --interactive --volume=actions-runner-temp:/home/actions-runner zlib-ng/actions-runner -``` -$ sudo systemctl stop actions-runner -$ sudo docker rm -f actions-runner -$ sudo docker volume rm actions-runner +# Start actions-runner service +sudo systemctl start actions-runner ``` diff --git a/src/native/external/zlib-ng/arch/s390/crc32-vx.c b/src/native/external/zlib-ng/arch/s390/crc32-vx.c index acfa21887e975..b3dcbf70305ce 100644 --- a/src/native/external/zlib-ng/arch/s390/crc32-vx.c +++ b/src/native/external/zlib-ng/arch/s390/crc32-vx.c @@ -12,8 +12,8 @@ * relicensed under the zlib license. */ -#include "../../zbuild.h" -#include "crc32_braid_p.h" +#include "zbuild.h" +#include "arch_functions.h" #include diff --git a/src/native/external/zlib-ng/arch/s390/dfltcc_common.h b/src/native/external/zlib-ng/arch/s390/dfltcc_common.h index b73437411b8e5..a6527ab5df126 100644 --- a/src/native/external/zlib-ng/arch/s390/dfltcc_common.h +++ b/src/native/external/zlib-ng/arch/s390/dfltcc_common.h @@ -3,20 +3,95 @@ #include "zutil.h" -void Z_INTERNAL *PREFIX(dfltcc_alloc_window)(PREFIX3(streamp) strm, uInt items, uInt size); -void Z_INTERNAL PREFIX(dfltcc_copy_window)(void *dest, const void *src, size_t n); -void Z_INTERNAL PREFIX(dfltcc_free_window)(PREFIX3(streamp) strm, void *w); +/* + Parameter Block for Query Available Functions. + */ +struct dfltcc_qaf_param { + char fns[16]; + char reserved1[8]; + char fmts[2]; + char reserved2[6]; +} ALIGNED_(8); -#define ZFREE_STATE ZFREE +/* + Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. + */ +struct dfltcc_param_v0 { + uint16_t pbvn; /* Parameter-Block-Version Number */ + uint8_t mvn; /* Model-Version Number */ + uint8_t ribm; /* Reserved for IBM use */ + uint32_t reserved32 : 31; + uint32_t cf : 1; /* Continuation Flag */ + uint8_t reserved64[8]; + uint32_t nt : 1; /* New Task */ + uint32_t reserved129 : 1; + uint32_t cvt : 1; /* Check Value Type */ + uint32_t reserved131 : 1; + uint32_t htt : 1; /* Huffman-Table Type */ + uint32_t bcf : 1; /* Block-Continuation Flag */ + uint32_t bcc : 1; /* Block Closing Control */ + uint32_t bhf : 1; /* Block Header Final */ + uint32_t reserved136 : 1; + uint32_t reserved137 : 1; + uint32_t dhtgc : 1; /* DHT Generation Control */ + uint32_t reserved139 : 5; + uint32_t reserved144 : 5; + uint32_t sbb : 3; /* Sub-Byte Boundary */ + uint8_t oesc; /* Operation-Ending-Supplemental Code */ + uint32_t reserved160 : 12; + uint32_t ifs : 4; /* Incomplete-Function Status */ + uint16_t ifl; /* Incomplete-Function Length */ + uint8_t reserved192[8]; + uint8_t reserved256[8]; + uint8_t reserved320[4]; + uint16_t hl; /* History Length */ + uint32_t reserved368 : 1; + uint16_t ho : 15; /* History Offset */ + uint32_t cv; /* Check Value */ + uint32_t eobs : 15; /* End-of-block Symbol */ + uint32_t reserved431: 1; + uint8_t eobl : 4; /* End-of-block Length */ + uint32_t reserved436 : 12; + uint32_t reserved448 : 4; + uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table + Length */ + uint8_t reserved464[6]; + uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */ + uint8_t reserved[24]; + uint8_t ribm2[8]; /* Reserved for IBM use */ + uint8_t csb[1152]; /* Continuation-State Buffer */ +} ALIGNED_(8); -#define ZALLOC_WINDOW PREFIX(dfltcc_alloc_window) +/* + Extension of inflate_state and deflate_state. + */ +struct dfltcc_state { + struct dfltcc_param_v0 param; /* Parameter block. */ + struct dfltcc_qaf_param af; /* Available functions. */ + char msg[64]; /* Buffer for strm->msg */ +}; -#define ZCOPY_WINDOW PREFIX(dfltcc_copy_window) +typedef struct { + struct dfltcc_state common; + uint16_t level_mask; /* Levels on which to use DFLTCC */ + uint32_t block_size; /* New block each X bytes */ + size_t block_threshold; /* New block after total_in > X */ + uint32_t dht_threshold; /* New block only if avail_in >= X */ +} arch_deflate_state; -#define ZFREE_WINDOW PREFIX(dfltcc_free_window) +typedef struct { + struct dfltcc_state common; +} arch_inflate_state; -#define TRY_FREE_WINDOW PREFIX(dfltcc_free_window) +/* + History buffer size. + */ +#define HB_BITS 15 +#define HB_SIZE (1 << HB_BITS) +/* + Sizes of deflate block parts. + */ #define DFLTCC_BLOCK_HEADER_BITS 3 #define DFLTCC_HLITS_COUNT_BITS 5 #define DFLTCC_HDISTS_COUNT_BITS 5 diff --git a/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.c b/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.c index 3ad988afc7bb9..90b4b96e9ce33 100644 --- a/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.c +++ b/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.c @@ -19,23 +19,9 @@ #include "dfltcc_deflate.h" #include "dfltcc_detail.h" -struct dfltcc_deflate_state { - struct dfltcc_state common; - uint16_t level_mask; /* Levels on which to use DFLTCC */ - uint32_t block_size; /* New block each X bytes */ - size_t block_threshold; /* New block after total_in > X */ - uint32_t dht_threshold; /* New block only if avail_in >= X */ -}; - -#define GET_DFLTCC_DEFLATE_STATE(state) ((struct dfltcc_deflate_state *)GET_DFLTCC_STATE(state)) - -void Z_INTERNAL *PREFIX(dfltcc_alloc_deflate_state)(PREFIX3(streamp) strm) { - return dfltcc_alloc_state(strm, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state)); -} - void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); + arch_deflate_state *dfltcc_state = &state->arch; dfltcc_reset_state(&dfltcc_state->common); @@ -46,14 +32,10 @@ void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) { dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; } -void Z_INTERNAL PREFIX(dfltcc_copy_deflate_state)(void *dst, const void *src) { - dfltcc_copy_state(dst, src, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state)); -} - static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy, int reproducible) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); + arch_deflate_state *dfltcc_state = &state->arch; /* Unsupported compression settings */ if ((dfltcc_state->level_mask & (1 << level)) == 0) @@ -82,7 +64,7 @@ int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm) { static inline void dfltcc_gdht(PREFIX3(streamp) strm) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; size_t avail_in = strm->avail_in; dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL); @@ -90,7 +72,7 @@ static inline void dfltcc_gdht(PREFIX3(streamp) strm) { static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; size_t avail_in = strm->avail_in; size_t avail_out = strm->avail_out; dfltcc_cc cc; @@ -127,7 +109,7 @@ static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); + arch_deflate_state *dfltcc_state = &state->arch; struct dfltcc_param_v0 *param = &dfltcc_state->common.param; uInt masked_avail_in; dfltcc_cc cc; @@ -328,7 +310,7 @@ int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_st */ static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; return strm->total_in > 0 || param->nt == 0 || param->hl > 0; } @@ -353,8 +335,7 @@ int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, i int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - struct dfltcc_param_v0 *param = &dfltcc_state->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might * close the block without resetting the compression state. Detect this @@ -382,8 +363,7 @@ int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int re int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, const unsigned char *dictionary, uInt dict_length) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - struct dfltcc_param_v0 *param = &dfltcc_state->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; append_history(param, state->window, dictionary, dict_length); state->strstart = 1; /* Add FDICT to zlib header */ @@ -393,8 +373,7 @@ int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - struct dfltcc_param_v0 *param = &dfltcc_state->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; if (dictionary) get_history(param, state->window, dictionary); diff --git a/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.h b/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.h index cb261b156c78b..35e2fd3f626a3 100644 --- a/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.h +++ b/src/native/external/zlib-ng/arch/s390/dfltcc_deflate.h @@ -1,11 +1,10 @@ #ifndef DFLTCC_DEFLATE_H #define DFLTCC_DEFLATE_H +#include "deflate.h" #include "dfltcc_common.h" -void Z_INTERNAL *PREFIX(dfltcc_alloc_deflate_state)(PREFIX3(streamp)); void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp)); -void Z_INTERNAL PREFIX(dfltcc_copy_deflate_state)(void *dst, const void *src); int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm); int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result); int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush); @@ -15,9 +14,6 @@ int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, const unsigned char *dictionary, uInt dict_length); int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); -#define ZALLOC_DEFLATE_STATE PREFIX(dfltcc_alloc_deflate_state) -#define ZCOPY_DEFLATE_STATE PREFIX(dfltcc_copy_deflate_state) - #define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ do { \ if (PREFIX(dfltcc_can_deflate)((strm))) \ @@ -57,4 +53,6 @@ int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsi #define DEFLATE_CAN_SET_REPRODUCIBLE PREFIX(dfltcc_can_set_reproducible) +#define DEFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE) + #endif diff --git a/src/native/external/zlib-ng/arch/s390/dfltcc_detail.h b/src/native/external/zlib-ng/arch/s390/dfltcc_detail.h index 362d94c337fd8..ae6001ba38634 100644 --- a/src/native/external/zlib-ng/arch/s390/dfltcc_detail.h +++ b/src/native/external/zlib-ng/arch/s390/dfltcc_detail.h @@ -1,4 +1,4 @@ -#include "../../zbuild.h" +#include "zbuild.h" #include #ifdef HAVE_SYS_SDT_H @@ -24,18 +24,8 @@ #define DFLTCC_RIBM 0 #endif -/* - Parameter Block for Query Available Functions. - */ #define static_assert(c, msg) __attribute__((unused)) static char static_assert_failed_ ## msg[c ? 1 : -1] -struct dfltcc_qaf_param { - char fns[16]; - char reserved1[8]; - char fmts[2]; - char reserved2[6]; -}; - #define DFLTCC_SIZEOF_QAF 32 static_assert(sizeof(struct dfltcc_qaf_param) == DFLTCC_SIZEOF_QAF, qaf); @@ -74,60 +64,11 @@ static inline int is_dfltcc_enabled(void) { #define DFLTCC_FMT0 0 -/* - Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. - */ #define CVT_CRC32 0 #define CVT_ADLER32 1 #define HTT_FIXED 0 #define HTT_DYNAMIC 1 -struct dfltcc_param_v0 { - uint16_t pbvn; /* Parameter-Block-Version Number */ - uint8_t mvn; /* Model-Version Number */ - uint8_t ribm; /* Reserved for IBM use */ - uint32_t reserved32 : 31; - uint32_t cf : 1; /* Continuation Flag */ - uint8_t reserved64[8]; - uint32_t nt : 1; /* New Task */ - uint32_t reserved129 : 1; - uint32_t cvt : 1; /* Check Value Type */ - uint32_t reserved131 : 1; - uint32_t htt : 1; /* Huffman-Table Type */ - uint32_t bcf : 1; /* Block-Continuation Flag */ - uint32_t bcc : 1; /* Block Closing Control */ - uint32_t bhf : 1; /* Block Header Final */ - uint32_t reserved136 : 1; - uint32_t reserved137 : 1; - uint32_t dhtgc : 1; /* DHT Generation Control */ - uint32_t reserved139 : 5; - uint32_t reserved144 : 5; - uint32_t sbb : 3; /* Sub-Byte Boundary */ - uint8_t oesc; /* Operation-Ending-Supplemental Code */ - uint32_t reserved160 : 12; - uint32_t ifs : 4; /* Incomplete-Function Status */ - uint16_t ifl; /* Incomplete-Function Length */ - uint8_t reserved192[8]; - uint8_t reserved256[8]; - uint8_t reserved320[4]; - uint16_t hl; /* History Length */ - uint32_t reserved368 : 1; - uint16_t ho : 15; /* History Offset */ - uint32_t cv; /* Check Value */ - uint32_t eobs : 15; /* End-of-block Symbol */ - uint32_t reserved431: 1; - uint8_t eobl : 4; /* End-of-block Length */ - uint32_t reserved436 : 12; - uint32_t reserved448 : 4; - uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table - Length */ - uint8_t reserved464[6]; - uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */ - uint8_t reserved[24]; - uint8_t ribm2[8]; /* Reserved for IBM use */ - uint8_t csb[1152]; /* Continuation-State Buffer */ -}; - #define DFLTCC_SIZEOF_GDHT_V0 384 #define DFLTCC_SIZEOF_CMPR_XPND_V0 1536 static_assert(offsetof(struct dfltcc_param_v0, csb) == DFLTCC_SIZEOF_GDHT_V0, gdht_v0); @@ -159,8 +100,30 @@ typedef enum { #define DFLTCC_XPND 4 #define HBT_CIRCULAR (1 << 7) #define DFLTCC_FN_MASK ((1 << 7) - 1) -#define HB_BITS 15 -#define HB_SIZE (1 << HB_BITS) + +/* Return lengths of high (starting at param->ho) and low (starting at 0) fragments of the circular history buffer. */ +static inline void get_history_lengths(struct dfltcc_param_v0 *param, size_t *hl_high, size_t *hl_low) { + *hl_high = MIN(param->hl, HB_SIZE - param->ho); + *hl_low = param->hl - *hl_high; +} + +/* Notify instrumentation about an upcoming read/write access to the circular history buffer. */ +static inline void instrument_read_write_hist(struct dfltcc_param_v0 *param, void *hist) { + size_t hl_high, hl_low; + + get_history_lengths(param, &hl_high, &hl_low); + instrument_read_write(hist + param->ho, hl_high); + instrument_read_write(hist, hl_low); +} + +/* Notify MSan about a completed write to the circular history buffer. */ +static inline void msan_unpoison_hist(struct dfltcc_param_v0 *param, void *hist) { + size_t hl_high, hl_low; + + get_history_lengths(param, &hl_high, &hl_low); + __msan_unpoison(hist + param->ho, hl_high); + __msan_unpoison(hist, hl_low); +} static inline dfltcc_cc dfltcc(int fn, void *param, unsigned char **op1, size_t *len1, @@ -170,14 +133,33 @@ static inline dfltcc_cc dfltcc(int fn, void *param, size_t t3 = len1 ? *len1 : 0; z_const unsigned char *t4 = op2 ? *op2 : NULL; size_t t5 = len2 ? *len2 : 0; - Z_REGISTER int r0 __asm__("r0") = fn; - Z_REGISTER void *r1 __asm__("r1") = param; - Z_REGISTER unsigned char *r2 __asm__("r2") = t2; - Z_REGISTER size_t r3 __asm__("r3") = t3; - Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4; - Z_REGISTER size_t r5 __asm__("r5") = t5; + Z_REGISTER int r0 __asm__("r0"); + Z_REGISTER void *r1 __asm__("r1"); + Z_REGISTER unsigned char *r2 __asm__("r2"); + Z_REGISTER size_t r3 __asm__("r3"); + Z_REGISTER z_const unsigned char *r4 __asm__("r4"); + Z_REGISTER size_t r5 __asm__("r5"); int cc; + /* Insert pre-instrumentation for DFLTCC. */ + switch (fn & DFLTCC_FN_MASK) { + case DFLTCC_QAF: + instrument_write(param, DFLTCC_SIZEOF_QAF); + break; + case DFLTCC_GDHT: + instrument_read_write(param, DFLTCC_SIZEOF_GDHT_V0); + instrument_read(t4, t5); + break; + case DFLTCC_CMPR: + case DFLTCC_XPND: + instrument_read_write(param, DFLTCC_SIZEOF_CMPR_XPND_V0); + instrument_read(t4, t5); + instrument_write(t2, t3); + instrument_read_write_hist(param, hist); + break; + } + + r0 = fn; r1 = param; r2 = t2; r3 = t3; r4 = t4; r5 = t5; __asm__ volatile( #ifdef HAVE_SYS_SDT_H STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) @@ -201,6 +183,7 @@ static inline dfltcc_cc dfltcc(int fn, void *param, : "cc", "memory"); t2 = r2; t3 = r3; t4 = r4; t5 = r5; + /* Insert post-instrumentation for DFLTCC. */ switch (fn & DFLTCC_FN_MASK) { case DFLTCC_QAF: __msan_unpoison(param, DFLTCC_SIZEOF_QAF); @@ -211,10 +194,12 @@ static inline dfltcc_cc dfltcc(int fn, void *param, case DFLTCC_CMPR: __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); __msan_unpoison(orig_t2, t2 - orig_t2 + (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1)); + msan_unpoison_hist(param, hist); break; case DFLTCC_XPND: __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); __msan_unpoison(orig_t2, t2 - orig_t2); + msan_unpoison_hist(param, hist); break; } @@ -229,23 +214,8 @@ static inline dfltcc_cc dfltcc(int fn, void *param, return (cc >> 28) & 3; } -/* - Extension of inflate_state and deflate_state. Must be doubleword-aligned. -*/ -struct dfltcc_state { - struct dfltcc_param_v0 param; /* Parameter block. */ - struct dfltcc_qaf_param af; /* Available functions. */ - char msg[64]; /* Buffer for strm->msg */ -}; - #define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) -#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8))) - -static inline void *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt size, uInt extension_size) { - return ZALLOC(strm, 1, ALIGN_UP(size, 8) + extension_size); -} - static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) { /* Initialize available functions */ if (is_dfltcc_enabled()) { @@ -297,12 +267,9 @@ static inline void append_history(struct dfltcc_param_v0 *param, unsigned char * static inline void get_history(struct dfltcc_param_v0 *param, const unsigned char *history, unsigned char *buf) { - if (param->ho + param->hl <= HB_SIZE) - /* Circular history buffer does not wrap - copy one chunk */ - memcpy(buf, history + param->ho, param->hl); - else { - /* Circular history buffer wraps - copy two chunks */ - memcpy(buf, history + param->ho, HB_SIZE - param->ho); - memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE); - } + size_t hl_high, hl_low; + + get_history_lengths(param, &hl_high, &hl_low); + memcpy(buf, history + param->ho, hl_high); + memcpy(buf + hl_high, history, hl_low); } diff --git a/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.c b/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.c index f0d3951b592b8..cc3cb39781c8a 100644 --- a/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.c +++ b/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.c @@ -20,24 +20,15 @@ #include "dfltcc_inflate.h" #include "dfltcc_detail.h" -struct inflate_state Z_INTERNAL *PREFIX(dfltcc_alloc_inflate_state)(PREFIX3(streamp) strm) { - return (struct inflate_state *)dfltcc_alloc_state(strm, sizeof(struct inflate_state), sizeof(struct dfltcc_state)); -} - void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - dfltcc_reset_state(dfltcc_state); -} - -void Z_INTERNAL PREFIX(dfltcc_copy_inflate_state)(struct inflate_state *dst, const struct inflate_state *src) { - dfltcc_copy_state(dst, src, sizeof(struct inflate_state), sizeof(struct dfltcc_state)); + dfltcc_reset_state(&state->arch.common); } int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_state *dfltcc_state = &state->arch.common; /* Unsupported hardware */ return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); @@ -45,7 +36,7 @@ int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm) { static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; size_t avail_in = strm->avail_in; size_t avail_out = strm->avail_out; dfltcc_cc cc; @@ -60,7 +51,7 @@ static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) { dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_state *dfltcc_state = &state->arch.common; struct dfltcc_param_v0 *param = &dfltcc_state->param; dfltcc_cc cc; @@ -86,10 +77,9 @@ dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, i if (strm->avail_in == 0 && !param->cf) return DFLTCC_INFLATE_BREAK; - if (PREFIX(inflate_ensure_window)(state)) { - state->mode = MEM; - return DFLTCC_INFLATE_CONTINUE; - } + /* if window not in use yet, initialize */ + if (state->wsize == 0) + state->wsize = 1U << state->wbits; /* Translate stream to parameter block */ param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32; @@ -123,9 +113,8 @@ dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, i int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; - return !param->nt; + return !state->arch.common.param.nt; } /* @@ -153,7 +142,7 @@ static void rotate(unsigned char *start, unsigned char *pivot, unsigned char *en int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_state *dfltcc_state = &state->arch.common; struct dfltcc_param_v0 *param = &dfltcc_state->param; if (!PREFIX(dfltcc_can_inflate)(strm)) @@ -178,13 +167,11 @@ int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) { int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, const unsigned char *dictionary, uInt dict_length) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - struct dfltcc_param_v0 *param = &dfltcc_state->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; - if (PREFIX(inflate_ensure_window)(state)) { - state->mode = MEM; - return Z_MEM_ERROR; - } + /* if window not in use yet, initialize */ + if (state->wsize == 0) + state->wsize = 1U << state->wbits; append_history(param, state->window, dictionary, dict_length); state->havedict = 1; @@ -194,8 +181,7 @@ int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) { struct inflate_state *state = (struct inflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - struct dfltcc_param_v0 *param = &dfltcc_state->param; + struct dfltcc_param_v0 *param = &state->arch.common.param; if (dictionary && state->window) get_history(param, state->window, dictionary); diff --git a/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.h b/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.h index 632fada621ace..3623f8ed7fee2 100644 --- a/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.h +++ b/src/native/external/zlib-ng/arch/s390/dfltcc_inflate.h @@ -3,9 +3,7 @@ #include "dfltcc_common.h" -struct inflate_state Z_INTERNAL *PREFIX(dfltcc_alloc_inflate_state)(PREFIX3(streamp) strm); void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm); -void Z_INTERNAL PREFIX(dfltcc_copy_inflate_state)(struct inflate_state *dst, const struct inflate_state *src); int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm); typedef enum { DFLTCC_INFLATE_CONTINUE, @@ -20,9 +18,6 @@ int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); -#define ZALLOC_INFLATE_STATE PREFIX(dfltcc_alloc_inflate_state) -#define ZCOPY_INFLATE_STATE PREFIX(dfltcc_copy_inflate_state) - #define INFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_inflate_state) #define INFLATE_PRIME_HOOK(strm, bits, value) \ @@ -67,4 +62,6 @@ int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, return PREFIX(dfltcc_inflate_get_dictionary)((strm), (dict), (dict_len)); \ } while (0) +#define INFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE) + #endif diff --git a/src/native/external/zlib-ng/arch/s390/s390_features.c b/src/native/external/zlib-ng/arch/s390/s390_features.c index 82901060ebbb2..629025d5bb147 100644 --- a/src/native/external/zlib-ng/arch/s390/s390_features.c +++ b/src/native/external/zlib-ng/arch/s390/s390_features.c @@ -1,4 +1,4 @@ -#include "../../zbuild.h" +#include "zbuild.h" #include "s390_features.h" #ifdef HAVE_SYS_AUXV_H diff --git a/src/native/external/zlib-ng/arch/s390/s390_features.h b/src/native/external/zlib-ng/arch/s390/s390_features.h index b8ffef74d8441..fb2ac14b26b5d 100644 --- a/src/native/external/zlib-ng/arch/s390/s390_features.h +++ b/src/native/external/zlib-ng/arch/s390/s390_features.h @@ -1,3 +1,7 @@ +/* s390_features.h -- check for s390 features. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + #ifndef S390_FEATURES_H_ #define S390_FEATURES_H_ diff --git a/src/native/external/zlib-ng/arch/s390/s390_functions.h b/src/native/external/zlib-ng/arch/s390/s390_functions.h new file mode 100644 index 0000000000000..e9c67978f0a03 --- /dev/null +++ b/src/native/external/zlib-ng/arch/s390/s390_functions.h @@ -0,0 +1,20 @@ +/* s390_functions.h -- s390 implementations for arch-specific functions. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef S390_FUNCTIONS_H_ +#define S390_FUNCTIONS_H_ + +#ifdef S390_CRC32_VX +uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__) +# undef native_crc32 +# define native_crc32 = crc32_s390_vx +# endif +#endif + +#endif diff --git a/src/native/external/zlib-ng/arch/x86/Makefile.in b/src/native/external/zlib-ng/arch/x86/Makefile.in index 7c052469b2987..c13cd179c0cc7 100644 --- a/src/native/external/zlib-ng/arch/x86/Makefile.in +++ b/src/native/external/zlib-ng/arch/x86/Makefile.in @@ -35,7 +35,6 @@ all: \ chunkset_ssse3.o chunkset_ssse3.lo \ compare256_avx2.o compare256_avx2.lo \ compare256_sse2.o compare256_sse2.lo \ - insert_string_sse42.o insert_string_sse42.lo \ crc32_pclmulqdq.o crc32_pclmulqdq.lo \ crc32_vpclmulqdq.o crc32_vpclmulqdq.lo \ slide_hash_avx2.o slide_hash_avx2.lo \ @@ -77,12 +76,6 @@ compare256_sse2.o: compare256_sse2.lo: $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse2.c -insert_string_sse42.o: - $(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c - -insert_string_sse42.lo: - $(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c - crc32_pclmulqdq.o: $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c @@ -90,10 +83,10 @@ crc32_pclmulqdq.lo: $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c crc32_vpclmulqdq.o: - $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + $(CC) $(CFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c crc32_vpclmulqdq.lo: - $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + $(CC) $(SFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c slide_hash_avx2.o: $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx2.c diff --git a/src/native/external/zlib-ng/arch/x86/adler32_avx2.c b/src/native/external/zlib-ng/arch/x86/adler32_avx2.c index e3ac6705cef35..38e7f068e3981 100644 --- a/src/native/external/zlib-ng/arch/x86/adler32_avx2.c +++ b/src/native/external/zlib-ng/arch/x86/adler32_avx2.c @@ -9,24 +9,15 @@ #ifdef X86_AVX2 -#include "../../zbuild.h" +#include "zbuild.h" #include -#include "../../adler32_fold.h" -#include "../../adler32_p.h" +#include "adler32_p.h" #include "adler32_avx2_p.h" #include "x86_intrins.h" -#ifdef X86_SSE42 extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len); -#define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d) -#define sub32(a, b, c) adler32_ssse3(a, b, c) -#else -#define copy_sub32(a, b, c, d) adler32_copy_len_16(adler0, c, b, d, adler1) -#define sub32(a, b, c) adler32_len_16(adler0, b, c, adler1) -#endif - static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) { if (src == NULL) return 1L; if (len == 0) return adler; @@ -44,9 +35,9 @@ static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, cons } } else if (len < 32) { if (COPY) { - return copy_sub32(adler, dst, src, len); + return adler32_fold_copy_sse42(adler, dst, src, len); } else { - return sub32(adler, src, len); + return adler32_ssse3(adler, src, len); } } diff --git a/src/native/external/zlib-ng/arch/x86/adler32_avx512.c b/src/native/external/zlib-ng/arch/x86/adler32_avx512.c index aa6cc170185b6..626c4807f821b 100644 --- a/src/native/external/zlib-ng/arch/x86/adler32_avx512.c +++ b/src/native/external/zlib-ng/arch/x86/adler32_avx512.c @@ -8,10 +8,9 @@ #ifdef X86_AVX512 -#include "../../zbuild.h" -#include "../../adler32_p.h" -#include "../../adler32_fold.h" -#include "../../cpu_features.h" +#include "zbuild.h" +#include "adler32_p.h" +#include "arch_functions.h" #include #include "x86_intrins.h" #include "adler32_avx512_p.h" @@ -33,13 +32,7 @@ static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, cons _mm512_mask_storeu_epi8(dst, storemask, copy_vec); } -#ifdef X86_AVX2 return adler32_avx2(adler, src, len); -#elif defined(X86_SSSE3) - return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif } __m512i vbuf, vs1_0, vs3; diff --git a/src/native/external/zlib-ng/arch/x86/adler32_avx512_vnni.c b/src/native/external/zlib-ng/arch/x86/adler32_avx512_vnni.c index 771f7ebe043f2..4c5cfc1cadb46 100644 --- a/src/native/external/zlib-ng/arch/x86/adler32_avx512_vnni.c +++ b/src/native/external/zlib-ng/arch/x86/adler32_avx512_vnni.c @@ -9,11 +9,10 @@ #ifdef X86_AVX512VNNI -#include "../../zbuild.h" -#include "../../adler32_p.h" -#include "../../cpu_features.h" +#include "zbuild.h" +#include "adler32_p.h" +#include "arch_functions.h" #include -#include "../../adler32_fold.h" #include "x86_intrins.h" #include "adler32_avx512_p.h" #include "adler32_avx2_p.h" @@ -28,20 +27,10 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size rem_peel: if (len < 32) -#if defined(X86_SSSE3) return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif if (len < 64) -#ifdef X86_AVX2 return adler32_avx2(adler, src, len); -#elif defined(X86_SSE3) - return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif const __m512i dot2v = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, @@ -135,11 +124,7 @@ Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, __m256i copy_vec = _mm256_maskz_loadu_epi8(storemask, src); _mm256_mask_storeu_epi8(dst, storemask, copy_vec); -#if defined(X86_SSSE3) return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif } const __m256i dot2v = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, diff --git a/src/native/external/zlib-ng/arch/x86/adler32_sse42.c b/src/native/external/zlib-ng/arch/x86/adler32_sse42.c index 257a360982ed8..df0739d1650ae 100644 --- a/src/native/external/zlib-ng/arch/x86/adler32_sse42.c +++ b/src/native/external/zlib-ng/arch/x86/adler32_sse42.c @@ -6,9 +6,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../adler32_p.h" -#include "../../adler32_fold.h" +#include "zbuild.h" +#include "adler32_p.h" #include "adler32_ssse3_p.h" #include diff --git a/src/native/external/zlib-ng/arch/x86/adler32_ssse3.c b/src/native/external/zlib-ng/arch/x86/adler32_ssse3.c index ae819d632e53a..15e2f78ba35ac 100644 --- a/src/native/external/zlib-ng/arch/x86/adler32_ssse3.c +++ b/src/native/external/zlib-ng/arch/x86/adler32_ssse3.c @@ -6,8 +6,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../adler32_p.h" +#include "zbuild.h" +#include "adler32_p.h" #include "adler32_ssse3_p.h" #ifdef X86_SSSE3 diff --git a/src/native/external/zlib-ng/arch/x86/chunkset_ssse3.c b/src/native/external/zlib-ng/arch/x86/chunkset_ssse3.c index c06d1b37bd7e9..722ecd3d51e30 100644 --- a/src/native/external/zlib-ng/arch/x86/chunkset_ssse3.c +++ b/src/native/external/zlib-ng/arch/x86/chunkset_ssse3.c @@ -4,10 +4,7 @@ #include "zbuild.h" -/* This requires SSE2 support. While it's implicit with SSSE3, we can minimize - * code size by sharing the chunkcopy functions, which will certainly compile - * to identical machine code */ -#if defined(X86_SSSE3) && defined(X86_SSE2) +#if defined(X86_SSSE3) #include #include "../generic/chunk_permute_table.h" @@ -19,8 +16,6 @@ typedef __m128i chunk_t; #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 #define HAVE_CHUNK_MAG -#define HAVE_CHUNKCOPY -#define HAVE_CHUNKUNROLL static const lut_rem_pair perm_idx_lut[13] = { {0, 1}, /* 3 */ @@ -83,14 +78,11 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t return ret_vec; } -extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len); -extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len); - #define CHUNKSIZE chunksize_ssse3 #define CHUNKMEMSET chunkmemset_ssse3 #define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3 -#define CHUNKCOPY chunkcopy_sse2 -#define CHUNKUNROLL chunkunroll_sse2 +#define CHUNKCOPY chunkcopy_ssse3 +#define CHUNKUNROLL chunkunroll_ssse3 #include "chunkset_tpl.h" diff --git a/src/native/external/zlib-ng/arch/x86/compare256_avx2.c b/src/native/external/zlib-ng/arch/x86/compare256_avx2.c index 1318a0e333a49..d2c835e4ee8ec 100644 --- a/src/native/external/zlib-ng/arch/x86/compare256_avx2.c +++ b/src/native/external/zlib-ng/arch/x86/compare256_avx2.c @@ -3,8 +3,9 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" - +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) diff --git a/src/native/external/zlib-ng/arch/x86/compare256_sse2.c b/src/native/external/zlib-ng/arch/x86/compare256_sse2.c index aad4bd240d202..216bb3a705c4a 100644 --- a/src/native/external/zlib-ng/arch/x86/compare256_sse2.c +++ b/src/native/external/zlib-ng/arch/x86/compare256_sse2.c @@ -3,8 +3,9 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" - +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) diff --git a/src/native/external/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h b/src/native/external/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h index 3e799283173cd..1ffe201dda7ce 100644 --- a/src/native/external/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h +++ b/src/native/external/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h @@ -26,27 +26,26 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; __m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3; __m128i xmm_crc_part = _mm_setzero_si128(); -#ifdef COPY char ALIGNED_(16) partial_buf[16] = { 0 }; -#else +#ifndef COPY __m128i xmm_initial = _mm_cvtsi32_si128(init_crc); int32_t first = init_crc != 0; - /* Technically the CRC functions don't even call this for input < 64, but a bare minimum of 31 - * bytes of input is needed for the aligning load that occurs. If there's an initial CRC, to - * carry it forward through the folded CRC there must be 16 - src % 16 + 16 bytes available, which - * by definition can be up to 15 bytes + one full vector load. */ - assert(len >= 31 || first == 0); + /* The CRC functions don't call this for input < 16, as a minimum of 16 bytes of input is needed + * for the aligning load that occurs. If there's an initial CRC, to carry it forward through + * the folded CRC there must be 16 - src % 16 + 16 bytes available, which by definition can be + * up to 15 bytes + one full vector load. */ + assert(len >= 16 || first == 0); #endif crc32_fold_load((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); if (len < 16) { -#ifdef COPY if (len == 0) return; memcpy(partial_buf, src, len); xmm_crc_part = _mm_load_si128((const __m128i *)partial_buf); +#ifdef COPY memcpy(dst, partial_buf, len); #endif goto partial; @@ -63,9 +62,23 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint if (algn_diff < 4 && init_crc != 0) { xmm_t0 = xmm_crc_part; - xmm_crc_part = _mm_loadu_si128((__m128i*)src + 1); - fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); - xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); + if (len >= 32) { + xmm_crc_part = _mm_loadu_si128((__m128i*)src + 1); + fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); + } else { + memcpy(partial_buf, src + 16, len - 16); + xmm_crc_part = _mm_load_si128((__m128i*)partial_buf); + fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); + src += 16; + len -= 16; +#ifdef COPY + dst -= algn_diff; +#endif + goto partial; + } + src += 16; len -= 16; } diff --git a/src/native/external/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h b/src/native/external/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h index 05d3b15257f74..3a4f6af5af35c 100644 --- a/src/native/external/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h +++ b/src/native/external/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h @@ -17,7 +17,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" +#include "zbuild.h" #include #include @@ -26,8 +26,9 @@ # include #endif -#include "../../crc32_fold.h" -#include "../../crc32_braid_p.h" +#include "crc32.h" +#include "crc32_braid_p.h" +#include "crc32_braid_tbl.h" #include "x86_intrins.h" #include @@ -350,11 +351,22 @@ Z_INTERNAL uint32_t CRC32_FOLD_FINAL(crc32_fold *crc) { return crc->value; } +static inline uint32_t crc32_small(uint32_t crc, const uint8_t *buf, size_t len) { + uint32_t c = (~crc) & 0xffffffff; + + while (len) { + len--; + DO1; + } + + return c ^ 0xffffffff; +} + Z_INTERNAL uint32_t CRC32(uint32_t crc32, const uint8_t *buf, size_t len) { - /* For lens < 64, crc32_braid method is faster. The CRC32 instruction for - * these short lengths might also prove to be effective */ - if (len < 64) - return PREFIX(crc32_braid)(crc32, buf, len); + /* For lens smaller than ~12, crc32_small method is faster. + * But there are also minimum requirements for the pclmul functions due to alignment */ + if (len < 16) + return crc32_small(crc32, buf, len); crc32_fold ALIGNED_(16) crc_state; CRC32_FOLD_RESET(&crc_state); diff --git a/src/native/external/zlib-ng/arch/x86/crc32_vpclmulqdq.c b/src/native/external/zlib-ng/arch/x86/crc32_vpclmulqdq.c index ec641b43263be..cad35b14eefc2 100644 --- a/src/native/external/zlib-ng/arch/x86/crc32_vpclmulqdq.c +++ b/src/native/external/zlib-ng/arch/x86/crc32_vpclmulqdq.c @@ -3,7 +3,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) +#ifdef X86_VPCLMULQDQ_CRC #define X86_VPCLMULQDQ #define CRC32_FOLD_COPY crc32_fold_vpclmulqdq_copy diff --git a/src/native/external/zlib-ng/arch/x86/slide_hash_avx2.c b/src/native/external/zlib-ng/arch/x86/slide_hash_avx2.c index 39254204ef462..853347323436e 100644 --- a/src/native/external/zlib-ng/arch/x86/slide_hash_avx2.c +++ b/src/native/external/zlib-ng/arch/x86/slide_hash_avx2.c @@ -9,8 +9,8 @@ * * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" #include diff --git a/src/native/external/zlib-ng/arch/x86/slide_hash_sse2.c b/src/native/external/zlib-ng/arch/x86/slide_hash_sse2.c index 5e75aedba5efb..6900a59d15dfa 100644 --- a/src/native/external/zlib-ng/arch/x86/slide_hash_sse2.c +++ b/src/native/external/zlib-ng/arch/x86/slide_hash_sse2.c @@ -8,8 +8,8 @@ * * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" #include #include diff --git a/src/native/external/zlib-ng/arch/x86/x86_features.c b/src/native/external/zlib-ng/arch/x86/x86_features.c index 8d11564c24f94..58cb4df341f2a 100644 --- a/src/native/external/zlib-ng/arch/x86/x86_features.c +++ b/src/native/external/zlib-ng/arch/x86/x86_features.c @@ -7,7 +7,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" +#include "zbuild.h" #include "x86_features.h" #ifdef _MSC_VER @@ -15,6 +15,13 @@ #else // Newer versions of GCC and clang come with cpuid.h # include +# ifdef X86_HAVE_XSAVE_INTRIN +# if __GNUC__ == 8 +# include +# else +# include +# endif +# endif #endif #include @@ -29,6 +36,7 @@ static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, *ecx = registers[2]; *edx = registers[3]; #else + *eax = *ebx = *ecx = *edx = 0; __cpuid(info, *eax, *ebx, *ecx, *edx); #endif } @@ -43,12 +51,13 @@ static inline void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, *ecx = registers[2]; *edx = registers[3]; #else + *eax = *ebx = *ecx = *edx = 0; __cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx); #endif } static inline uint64_t xgetbv(unsigned int xcr) { -#ifdef _MSC_VER +#if defined(_MSC_VER) || defined(X86_HAVE_XSAVE_INTRIN) return _xgetbv(xcr); #else uint32_t eax, edx; @@ -90,7 +99,16 @@ void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) { // check AVX512 bits if the OS supports saving ZMM registers if (features->has_os_save_zmm) { - features->has_avx512 = ebx & 0x00010000; + features->has_avx512f = ebx & 0x00010000; + if (features->has_avx512f) { + // According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable + // AVX512(DQ,BW,VL). + features->has_avx512dq = ebx & 0x00020000; + features->has_avx512bw = ebx & 0x40000000; + features->has_avx512vl = ebx & 0x80000000; + } + features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \ + && features->has_avx512vl; features->has_avx512vnni = ecx & 0x800; } } diff --git a/src/native/external/zlib-ng/arch/x86/x86_features.h b/src/native/external/zlib-ng/arch/x86/x86_features.h index 4a36bde835d32..6daa5e38282ff 100644 --- a/src/native/external/zlib-ng/arch/x86/x86_features.h +++ b/src/native/external/zlib-ng/arch/x86/x86_features.h @@ -1,14 +1,18 @@ /* x86_features.h -- check for CPU features -* Copyright (C) 2013 Intel Corporation Jim Kukunas -* For conditions of distribution and use, see copyright notice in zlib.h -*/ + * Copyright (C) 2013 Intel Corporation Jim Kukunas + * For conditions of distribution and use, see copyright notice in zlib.h + */ #ifndef X86_FEATURES_H_ #define X86_FEATURES_H_ struct x86_cpu_features { int has_avx2; - int has_avx512; + int has_avx512f; + int has_avx512dq; + int has_avx512bw; + int has_avx512vl; + int has_avx512_common; // Enabled when AVX512(F,DQ,BW,VL) are all enabled. int has_avx512vnni; int has_sse2; int has_ssse3; @@ -21,4 +25,4 @@ struct x86_cpu_features { void Z_INTERNAL x86_check_features(struct x86_cpu_features *features); -#endif /* CPU_H_ */ +#endif /* X86_FEATURES_H_ */ diff --git a/src/native/external/zlib-ng/arch/x86/x86_functions.h b/src/native/external/zlib-ng/arch/x86/x86_functions.h new file mode 100644 index 0000000000000..5aa9b31747452 --- /dev/null +++ b/src/native/external/zlib-ng/arch/x86/x86_functions.h @@ -0,0 +1,172 @@ +/* x86_functions.h -- x86 implementations for arch-specific functions. + * Copyright (C) 2013 Intel Corporation Jim Kukunas + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef X86_FUNCTIONS_H_ +#define X86_FUNCTIONS_H_ + +#ifdef X86_SSE2 +uint32_t chunksize_sse2(void); +uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); + +# ifdef HAVE_BUILTIN_CTZ + uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); + uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); + void slide_hash_sse2(deflate_state *s); +# endif + void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start); +#endif + +#ifdef X86_SSSE3 +uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); +uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left); +void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef X86_SSE42 +uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef X86_AVX2 +uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +uint32_t chunksize_avx2(void); +uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left); + +# ifdef HAVE_BUILTIN_CTZ + uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); + uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); + void slide_hash_avx2(deflate_state *s); +# endif + void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start); +#endif +#ifdef X86_AVX512 +uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif +#ifdef X86_AVX512VNNI +uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef X86_PCLMULQDQ_CRC +uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); +void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); +uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); +uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); +#endif +#ifdef X86_VPCLMULQDQ_CRC +uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); +void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); +uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc); +uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// X86 - SSE2 +# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2) +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_sse2 +# undef native_chunksize +# define native_chunksize chunksize_sse2 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_sse2 +# undef native_slide_hash +# define native_slide_hash slide_hash_sse2 +# ifdef HAVE_BUILTIN_CTZ +# undef native_compare256 +# define native_compare256 compare256_sse2 +# undef native_longest_match +# define native_longest_match longest_match_sse2 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_sse2 +# endif +#endif +// X86 - SSSE3 +# if defined(X86_SSSE3) && defined(__SSSE3__) +# undef native_adler32 +# define native_adler32 adler32_ssse3 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_ssse3 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_ssse3 +# endif +// X86 - SSE4.2 +# if defined(X86_SSE42) && defined(__SSE4_2__) +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_sse42 +# endif + +// X86 - PCLMUL +#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__) +# undef native_crc32 +# define native_crc32 crc32_pclmulqdq +# undef native_crc32_fold +# define native_crc32_fold crc32_fold_pclmulqdq +# undef native_crc32_fold_copy +# define native_crc32_fold_copy crc32_fold_pclmulqdq_copy +# undef native_crc32_fold_final +# define native_crc32_fold_final crc32_fold_pclmulqdq_final +# undef native_crc32_fold_reset +# define native_crc32_fold_reset crc32_fold_pclmulqdq_reset +#endif +// X86 - AVX +# if defined(X86_AVX2) && defined(__AVX2__) +# undef native_adler32 +# define native_adler32 adler32_avx2 +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_avx2 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_avx2 +# undef native_chunksize +# define native_chunksize chunksize_avx2 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_avx2 +# undef native_slide_hash +# define native_slide_hash slide_hash_avx2 +# ifdef HAVE_BUILTIN_CTZ +# undef native_compare256 +# define native_compare256 compare256_avx2 +# undef native_longest_match +# define native_longest_match longest_match_avx2 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_avx2 +# endif +# endif + +// X86 - AVX512 (F,DQ,BW,Vl) +# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__) +# undef native_adler32 +# define native_adler32 adler32_avx512 +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_avx512 +// X86 - AVX512 (VNNI) +# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__) +# undef native_adler32 +# define native_adler32 adler32_avx512_vnni +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni +# endif +// X86 - VPCLMULQDQ +# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__) +# undef native_crc32 +# define native_crc32 crc32_vpclmulqdq +# undef native_crc32_fold +# define native_crc32_fold crc32_fold_vpclmulqdq +# undef native_crc32_fold_copy +# define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy +# undef native_crc32_fold_final +# define native_crc32_fold_final crc32_fold_vpclmulqdq_final +# undef native_crc32_fold_reset +# define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset +# endif +# endif +#endif + +#endif /* X86_FUNCTIONS_H_ */ diff --git a/src/native/external/zlib-ng/arch/x86/x86_intrins.h b/src/native/external/zlib-ng/arch/x86/x86_intrins.h index 52e1085d66f97..0e596d18a1439 100644 --- a/src/native/external/zlib-ng/arch/x86/x86_intrins.h +++ b/src/native/external/zlib-ng/arch/x86/x86_intrins.h @@ -7,7 +7,7 @@ #ifdef __AVX2__ #include -#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10) \ +#if (!defined(__clang__) && !defined(__NVCOMPILER) && defined(__GNUC__) && __GNUC__ < 10) \ || (defined(__apple_build_version__) && __apple_build_version__ < 9020039) static inline __m256i _mm256_zextsi128_si256(__m128i a) { __m128i r; @@ -29,7 +29,7 @@ static inline __m512i _mm512_zextsi128_si512(__m128i a) { /* GCC <9 is missing some AVX512 intrinsics. */ #ifdef __AVX512F__ -#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9) +#if (!defined(__clang__) && !defined(__NVCOMPILER) && defined(__GNUC__) && __GNUC__ < 9) #include #define PACK(c0, c1, c2, c3) (((int)(unsigned char)(c0) << 24) | ((int)(unsigned char)(c1) << 16) | \ diff --git a/src/native/external/zlib-ng/arch_functions.h b/src/native/external/zlib-ng/arch_functions.h new file mode 100644 index 0000000000000..9a7f8d9379fb0 --- /dev/null +++ b/src/native/external/zlib-ng/arch_functions.h @@ -0,0 +1,29 @@ +/* arch_functions.h -- Arch-specific function prototypes. + * Copyright (C) 2017 Hans Kristian Rosbach + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef CPU_FUNCTIONS_H_ +#define CPU_FUNCTIONS_H_ + +#include "zbuild.h" +#include "zutil.h" +#include "crc32.h" +#include "deflate.h" +#include "fallback_builtins.h" + +#include "arch/generic/generic_functions.h" + +#if defined(X86_FEATURES) +# include "arch/x86/x86_functions.h" +#elif defined(ARM_FEATURES) +# include "arch/arm/arm_functions.h" +#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) +# include "arch/power/power_functions.h" +#elif defined(S390_FEATURES) +# include "arch/s390/s390_functions.h" +#elif defined(RISCV_FEATURES) +# include "arch/riscv/riscv_functions.h" +#endif + +#endif diff --git a/src/native/external/zlib-ng/chunkset_tpl.h b/src/native/external/zlib-ng/chunkset_tpl.h index f909a12557f0c..f5cc5c04506df 100644 --- a/src/native/external/zlib-ng/chunkset_tpl.h +++ b/src/native/external/zlib-ng/chunkset_tpl.h @@ -5,7 +5,7 @@ #include "zbuild.h" #include -#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) +#if CHUNK_SIZE == 32 && defined(X86_SSSE3) extern uint8_t* chunkmemset_ssse3(uint8_t *out, unsigned dist, unsigned len); #endif @@ -25,7 +25,7 @@ Z_INTERNAL uint32_t CHUNKSIZE(void) { without iteration, which will hopefully make the branch prediction more reliable. */ #ifndef HAVE_CHUNKCOPY -Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { +static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { Assert(len > 0, "chunkcopy should never have a length 0"); chunk_t chunk; int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; @@ -54,7 +54,7 @@ Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { least 258 bytes of output space available (258 being the maximum length output from a single token; see inflate_fast()'s assumptions below). */ #ifndef HAVE_CHUNKUNROLL -Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { +static inline uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { unsigned char const *from = out - *dist; chunk_t chunk; while (*dist < *len && *dist < sizeof(chunk_t)) { @@ -98,7 +98,7 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */ Assert(dist > 0, "chunkmemset cannot have a distance 0"); /* Only AVX2 */ -#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) +#if CHUNK_SIZE == 32 && defined(X86_SSSE3) if (len <= 16) { return chunkmemset_ssse3(out, dist, len); } diff --git a/src/native/external/zlib-ng/cmake/detect-intrinsics.cmake b/src/native/external/zlib-ng/cmake/detect-intrinsics.cmake index 74ac3910b8f44..14f82fcbf588f 100644 --- a/src/native/external/zlib-ng/cmake/detect-intrinsics.cmake +++ b/src/native/external/zlib-ng/cmake/detect-intrinsics.cmake @@ -2,40 +2,39 @@ # Licensed under the Zlib license, see LICENSE.md for details macro(check_acle_compiler_flag) - if(MSVC) - # Both ARM and ARM64-targeting msvc support intrinsics, but - # ARM msvc is missing some intrinsics introduced with ARMv8, e.g. crc32 - if(MSVC_C_ARCHITECTURE_ID STREQUAL "ARM64") - set(HAVE_ACLE_FLAG TRUE) - endif() - else() + if(NOT NATIVEFLAG) if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + check_c_compiler_flag("-march=armv8-a+crc" HAVE_MARCH_ARMV8_CRC) + if(HAVE_MARCH_ARMV8_CRC) set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support") + else() + check_c_compiler_flag("-march=armv8-a+crc+simd" HAVE_MARCH_ARMV8_CRC_SIMD) + if(HAVE_MARCH_ARMV8_CRC_SIMD) + set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support") + endif() endif() endif() - # Check whether compiler supports ACLE flag - set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") - check_c_source_compiles( - "int main() { return 0; }" - HAVE_ACLE_FLAG FAIL_REGEX "not supported") - if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG) - set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support" FORCE) - # Check whether compiler supports ACLE flag - set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}") - check_c_source_compiles( - "int main() { return 0; }" - HAVE_ACLE_FLAG2 FAIL_REGEX "not supported") - set(HAVE_ACLE_FLAG ${HAVE_ACLE_FLAG2} CACHE INTERNAL "Have compiler option to enable ACLE intrinsics" FORCE) - unset(HAVE_ACLE_FLAG2 CACHE) # Don't cache this internal variable - endif() - set(CMAKE_REQUIRED_FLAGS) endif() + # Check whether compiler supports ARMv8 CRC intrinsics + set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") + check_c_source_compiles( + "#if defined(_MSC_VER) + #include + #else + #include + #endif + unsigned int f(unsigned int a, unsigned int b) { + return __crc32w(a, b); + } + int main(void) { return 0; }" + HAVE_ACLE_FLAG + ) + set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_armv6_compiler_flag) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6) if(HAVE_MARCH_ARMV6) set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support") @@ -74,14 +73,14 @@ macro(check_armv6_compiler_flag) endmacro() macro(check_avx512_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") - else() - set(AVX512FLAG "/arch:AVX512") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") + else() + set(AVX512FLAG "/arch:AVX512") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal # instruction scheduling unless you specify a reasonable -mtune= target set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") @@ -94,9 +93,9 @@ macro(check_avx512_intrinsics) endif() unset(HAVE_CASCADE_LAKE) endif() + elseif(MSVC) + set(AVX512FLAG "/arch:AVX512") endif() - elseif(MSVC) - set(AVX512FLAG "/arch:AVX512") endif() # Check whether compiler supports AVX512 intrinsics set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") @@ -109,26 +108,17 @@ macro(check_avx512_intrinsics) int main(void) { return 0; }" HAVE_AVX512_INTRIN ) - - # Evidently both GCC and clang were late to implementing these - check_c_source_compiles( - "#include - __mmask16 f(__mmask16 x) { return _knot_mask16(x); } - int main(void) { return 0; }" - HAVE_MASK_INTRIN - ) - set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_avx512vnni_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") - else() - set(AVX512VNNIFLAG "/arch:AVX512") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") + set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni") + else() + set(AVX512VNNIFLAG "/arch:AVX512") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni") if(NOT MSVC) check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE) @@ -139,11 +129,10 @@ macro(check_avx512vnni_intrinsics) endif() unset(HAVE_CASCADE_LAKE) endif() + elseif(MSVC) + set(AVX512VNNIFLAG "/arch:AVX512") endif() - elseif(MSVC) - set(AVX512VNNIFLAG "/arch:AVX512") endif() - # Check whether compiler supports AVX512vnni intrinsics set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") check_c_source_compiles( @@ -159,18 +148,18 @@ macro(check_avx512vnni_intrinsics) endmacro() macro(check_avx2_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(AVX2FLAG "-mavx2") + else() + set(AVX2FLAG "/arch:AVX2") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(AVX2FLAG "-mavx2") - else() + elseif(MSVC) set(AVX2FLAG "/arch:AVX2") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) - set(AVX2FLAG "-mavx2") - endif() - elseif(MSVC) - set(AVX2FLAG "/arch:AVX2") endif() # Check whether compiler supports AVX2 intrinics set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") @@ -187,8 +176,8 @@ macro(check_avx2_intrinsics) endmacro() macro(check_neon_compiler_flag) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") if("${ARCH}" MATCHES "aarch64") set(NEONFLAG "-march=armv8-a+simd") else() @@ -206,12 +195,52 @@ macro(check_neon_compiler_flag) #endif int main() { return 0; }" NEON_AVAILABLE FAIL_REGEX "not supported") + # Check whether compiler native flag is enough for NEON support + # Some GCC versions don't enable FPU (vector unit) when using -march=native + if(NEON_AVAILABLE AND NATIVEFLAG AND (NOT "${ARCH}" MATCHES "aarch64")) + check_c_source_compiles( + "#include + uint8x16_t f(uint8x16_t x, uint8x16_t y) { + return vaddq_u8(x, y); + } + int main(int argc, char* argv[]) { + uint8x16_t a = vdupq_n_u8(argc); + uint8x16_t b = vdupq_n_u8(argc); + uint8x16_t result = f(a, b); + return result[0]; + }" + ARM_NEON_SUPPORT_NATIVE + ) + if(NOT ARM_NEON_SUPPORT_NATIVE) + set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} -mfpu=neon ${ZNOLTOFLAG}") + check_c_source_compiles( + "#include + uint8x16_t f(uint8x16_t x, uint8x16_t y) { + return vaddq_u8(x, y); + } + int main(int argc, char* argv[]) { + uint8x16_t a = vdupq_n_u8(argc); + uint8x16_t b = vdupq_n_u8(argc); + uint8x16_t result = f(a, b); + return result[0]; + }" + ARM_NEON_SUPPORT_NATIVE_MFPU + ) + if(ARM_NEON_SUPPORT_NATIVE_MFPU) + set(NEONFLAG "-mfpu=neon") + else() + # Remove local NEON_AVAILABLE variable and overwrite the cache + unset(NEON_AVAILABLE) + set(NEON_AVAILABLE "" CACHE INTERNAL "NEON support available" FORCE) + endif() + endif() + endif() set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_neon_ld4_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") if("${ARCH}" MATCHES "aarch64") set(NEONFLAG "-march=armv8-a+simd") else() @@ -234,8 +263,8 @@ macro(check_neon_ld4_intrinsics) endmacro() macro(check_pclmulqdq_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") set(PCLMULFLAG "-mpclmul") endif() endif() @@ -257,8 +286,8 @@ macro(check_pclmulqdq_intrinsics) endmacro() macro(check_vpclmulqdq_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") set(VPCLMULFLAG "-mvpclmulqdq -mavx512f") endif() endif() @@ -341,8 +370,8 @@ macro(check_ppc_intrinsics) endmacro() macro(check_power8_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(POWER8FLAG "-mcpu=power8") endif() endif() @@ -364,12 +393,27 @@ macro(check_power8_intrinsics) }" HAVE_POWER8_INTRIN ) + if(NOT HAVE_POWER8_INTRIN AND HAVE_LINUX_AUXVEC_H) + check_c_source_compiles( + "#include + #include + int main() { + return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); + }" + HAVE_POWER8_INTRIN2 + ) + if(HAVE_POWER8_INTRIN2) + set(POWER8_NEED_AUXVEC_H 1) + set(HAVE_POWER8_INTRIN ${HAVE_POWER8_INTRIN2} CACHE INTERNAL "Have POWER8 intrinsics" FORCE) + unset(HAVE_POWER8_INTRIN2 CACHE) + endif() + endif() set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_rvv_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(RISCVFLAG "-march=rv64gcv") endif() endif() @@ -399,8 +443,8 @@ macro(check_s390_intrinsics) endmacro() macro(check_power9_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(POWER9FLAG "-mcpu=power9") endif() endif() @@ -422,22 +466,37 @@ macro(check_power9_intrinsics) }" HAVE_POWER9_INTRIN ) + if(NOT HAVE_POWER9_INTRIN AND HAVE_LINUX_AUXVEC_H) + check_c_source_compiles( + "#include + #include + int main() { + return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00); + }" + HAVE_POWER9_INTRIN2 + ) + if(HAVE_POWER9_INTRIN2) + set(POWER9_NEED_AUXVEC_H 1) + set(HAVE_POWER9_INTRIN ${HAVE_POWER9_INTRIN2} CACHE INTERNAL "Have POWER9 intrinsics" FORCE) + unset(HAVE_POWER9_INTRIN2 CACHE) + endif() + endif() set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_sse2_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(SSE2FLAG "-msse2") - else() - set(SSE2FLAG "/arch:SSE2") - endif() - elseif(MSVC) - if(NOT "${ARCH}" MATCHES "x86_64") - set(SSE2FLAG "/arch:SSE2") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(SSE2FLAG "-msse2") + else() + set(SSE2FLAG "/arch:SSE2") + endif() + elseif(MSVC) + if(NOT "${ARCH}" MATCHES "x86_64") + set(SSE2FLAG "/arch:SSE2") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(SSE2FLAG "-msse2") endif() endif() @@ -453,14 +512,14 @@ macro(check_sse2_intrinsics) endmacro() macro(check_ssse3_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(SSSE3FLAG "-mssse3") - else() - set(SSSE3FLAG "/arch:SSSE3") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(SSSE3FLAG "-mssse3") + else() + set(SSSE3FLAG "/arch:SSSE3") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(SSSE3FLAG "-mssse3") endif() endif() @@ -478,14 +537,14 @@ macro(check_ssse3_intrinsics) endmacro() macro(check_sse42_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(SSE42FLAG "-msse4.2") - else() - set(SSE42FLAG "/arch:SSE4.2") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(SSE42FLAG "-msse4.2") + else() + set(SSE42FLAG "/arch:SSE4.2") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(SSE42FLAG "-msse4.2") endif() endif() @@ -526,15 +585,17 @@ macro(check_vgfma_intrinsics) endmacro() macro(check_xsave_intrinsics) - if(NOT NATIVEFLAG AND NOT MSVC) + if(NOT NATIVEFLAG AND NOT MSVC AND NOT CMAKE_C_COMPILER_ID MATCHES "Intel") set(XSAVEFLAG "-mxsave") endif() set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") check_c_source_compiles( "#ifdef _MSC_VER # include + #elif __GNUC__ == 8 && __GNUC_MINOR__ > 1 + # include #else - # include + # include #endif unsigned int f(unsigned int a) { return (int) _xgetbv(a); } int main(void) { return 0; }" diff --git a/src/native/external/zlib-ng/configure b/src/native/external/zlib-ng/configure index 20fb91aeb0799..8e693fe7b8c3e 100644 --- a/src/native/external/zlib-ng/configure +++ b/src/native/external/zlib-ng/configure @@ -300,7 +300,9 @@ show $cc -c $test.c if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then echo "$cc" | tee -a configure.log CC="$cc" - CFLAGS="${CFLAGS} -std=c11" + if test "${CFLAGS#*"-std="}" = "$CFLAGS" ; then + CFLAGS="${CFLAGS} -std=c11" + fi # Re-check ARCH if the compiler is a cross-compiler. if $CC -print-multiarch 1> /dev/null 2>&1 && test -n "$($CC -print-multiarch)" 1> /dev/null 2>&1; then @@ -363,10 +365,10 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then case "$uname" in Linux* | linux* | GNU | GNU/* | solaris*) LDSHARED=${LDSHARED-"$cc"} - LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1},--version-script,${SRCDIR}/${MAPNAME}" ;; + LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1}" ;; *BSD | *bsd* | DragonFly) LDSHARED=${LDSHARED-"$cc"} - LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1},--version-script,${SRCDIR}/${MAPNAME}" + LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1}" LDCONFIG="ldconfig -m" ;; CYGWIN* | Cygwin* | cygwin*) visibility=0 @@ -384,7 +386,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then SHAREDTARGET=$SHAREDLIB IMPORTLIB="${LIBNAME}.dll.a" LDSHARED=${LDSHARED-"$cc"} - LDSHAREDFLAGS="-shared -Wl,--out-implib,${IMPORTLIB},--version-script,${SRCDIR}/${MAPNAME}" + LDSHAREDFLAGS="-shared -Wl,--out-implib,${IMPORTLIB}" LDSHAREDLIBC="" if test $gzfileops -eq 0; then DEFFILE='win32/${LIBNAME2}.def' @@ -433,7 +435,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then SHAREDTARGET=$SHAREDLIB IMPORTLIB="${LIBNAME}.dll.a" LDSHARED=${LDSHARED-"$cc"} - LDSHAREDFLAGS="-shared -Wl,--out-implib=${IMPORTLIB} -Wl,--version-script=${SRCDIR}/${MAPNAME}" + LDSHAREDFLAGS="-shared -Wl,--out-implib=${IMPORTLIB}" LDSHAREDLIBC="" if test $gzfileops -eq 0; then DEFFILE='win32/${LIBNAME2}.def' @@ -471,7 +473,9 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then SHAREDTARGET=$SHAREDLIBV LDSHARED=${LDSHARED-"$cc"} LDSHAREDFLAGS="-dynamiclib -install_name @rpath/${SHAREDLIBM} -compatibility_version ${VER1} -current_version ${VER3}" - if libtool -V 2>&1 | grep Apple > /dev/null; then + if "${CROSS_PREFIX}libtool" -V 2>&1 | grep Apple > /dev/null; then + AR="${CROSS_PREFIX}libtool" + elif libtool -V 2>&1 | grep Apple > /dev/null; then AR="libtool" else AR="/usr/bin/libtool" @@ -479,7 +483,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then ARFLAGS="-o" ;; aarch64) LDSHARED=${LDSHARED-"$cc"} - LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1} -Wl,--version-script,${SRCDIR}/${MAPNAME}" + LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1}" LDSHAREDLIBC="-Wl,--start-group -lc -lrdimon -Wl,--end-group" ;; *) LDSHARED=${LDSHARED-"$cc"} @@ -597,7 +601,7 @@ EOF if test $shared -eq 1; then printf "Checking for shared library support... " | tee -a configure.log # we must test in two steps (cc then ld), required at least on SunOS 4.x - if try $CC -w -c $SFLAGS $test.c && + if try $CC -c $SFLAGS $test.c && try $LDSHARED $LDSHAREDFLAGS $LDFLAGS -o $test$shared_ext $test.o $LDSHAREDLIBC; then echo "Building shared library $SHAREDTARGET with $CC." | tee -a configure.log elif test -z "$old_cc" -a -z "$old_cflags"; then @@ -626,6 +630,29 @@ fi echo >> configure.log +# check for version script support +cat > $test.c < $test.map <> configure.log + # check for large file support, and if none, check for fseeko() cat > $test.c < @@ -858,7 +885,7 @@ fi # enable reduced memory configuration if test $reducedmem -eq 1; then echo "Configuring for reduced memory environment." | tee -a configure.log - CFLAGS="${CFLAGS} -DHASH_SIZE=32768u -DGZBUFSIZE=8192" + CFLAGS="${CFLAGS} -DHASH_SIZE=32768u -DGZBUFSIZE=8192 -DNO_LIT_MEM" fi # if code coverage testing was requested, use older gcc if defined, e.g. "gcc-4.2" on Mac OS X @@ -973,6 +1000,24 @@ else echo "Checking for attribute(aligned) ... No." | tee -a configure.log fi +# Check for __builtin_assume_aligned(x,n) support in compiler +cat > $test.c << EOF +char *test(char *buffer) { + char *abuffer = __builtin_assume_aligned(buffer,64); + return abuffer; +} +int main() { + return 0; +} +EOF +if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then + echo "Checking for __builtin_assume_aligned() ... Yes." | tee -a configure.log + CFLAGS="$CFLAGS -DHAVE_BUILTIN_ASSUME_ALIGNED" + SFLAGS="$SFLAGS -DHAVE_BUILTIN_ASSUME_ALIGNED" +else + echo "Checking for __builtin_assume_aligned() ... No." | tee -a configure.log +fi + # Check for __builtin_ctz() support in compiler cat > $test.c << EOF long f(unsigned int x) { return __builtin_ctz(x); } @@ -1085,42 +1130,39 @@ EOF fi } -check_mask_intrinsics() { - # Check whether compiler supports AVX512 k-mask intrinsics - cat > $test.c << EOF -#include -__mmask16 f(__mmask16 x) { return _knot_mask16(x); } -int main(void) { return 0; } -EOF - if try ${CC} ${CFLAGS} ${avx512flag} $test.c; then - echo "Checking for AVX512 k-mask intrinsics ... Yes." | tee -a configure.log - HAVE_MASK_INTRIN=1 - else - echo "Checking for AVX512 k-mask intrinsics ... No." | tee -a configure.log - HAVE_MASK_INTRIN=0 - fi -} - check_acle_compiler_flag() { # Check whether -march=armv8-a+crc works correctly cat > $test.c << EOF int main() { return 0; } EOF if try $CC -c $CFLAGS -march=armv8-a+crc $test.c; then - ACLE_AVAILABLE=1 echo "Check whether -march=armv8-a+crc works ... Yes." | tee -a configure.log acleflag="-march=armv8-a+crc" else echo "Check whether -march=armv8-a+crc works ... No." | tee -a configure.log if try $CC -c $CFLAGS -march=armv8-a+crc+simd $test.c; then - ACLE_AVAILABLE=1 echo "Check whether -march=armv8-a+crc+simd works ... Yes." | tee -a configure.log acleflag="-march=armv8-a+crc+simd" else - ACLE_AVAILABLE=0 echo "Check whether -march=armv8-a+crc+simd works ... No." | tee -a configure.log fi fi + + # Check whether compiler supports ARMv8 CRC intrinsics + cat > $test.c << EOF +#include +unsigned int f(unsigned int a, unsigned int b) { + return __crc32w(a, b); +} +int main(void) { return 0; } +EOF + if try ${CC} ${CFLAGS} ${acleflag} $test.c; then + echo "Checking for ARMv8 CRC intrinsics ... Yes." | tee -a configure.log + ACLE_AVAILABLE=1 + else + echo "Checking for ARMv8 CRC intrinsics ... No." | tee -a configure.log + ACLE_AVAILABLE=0 + fi } check_neon_compiler_flag() { @@ -1254,8 +1296,10 @@ check_xsave_intrinsics() { cat > $test.c << EOF #ifdef _MSC_VER # include +#elif __GNUC__ == 8 && __GNUC_MINOR__ > 1 +# include #else -# include +# include #endif unsigned int f(unsigned int a) { return (int) _xgetbv(a); } int main(void) { return 0; } @@ -1480,6 +1524,56 @@ case "${ARCH}" in ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86_features.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86_features.lo" + check_xsave_intrinsics + + if test ${HAVE_XSAVE_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_HAVE_XSAVE_INTRIN" + SFLAGS="${SFLAGS} -DX86_HAVE_XSAVE_INTRIN" + else + xsaveflag="" + fi + + check_sse2_intrinsics + + if test ${HAVE_SSE2_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_SSE2" + SFLAGS="${SFLAGS} -DX86_SSE2" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o compare256_sse2.o slide_hash_sse2.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo compare256_sse2.lo slide_hash_sse2.lo" + + if test $forcesse2 -eq 1; then + CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2" + SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2" + fi + fi + + check_ssse3_intrinsics + + if test ${HAVE_SSSE3_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_SSSE3" + SFLAGS="${SFLAGS} -DX86_SSSE3" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o chunkset_ssse3.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo chunkset_ssse3.lo" + fi + + check_sse42_intrinsics + + if test ${HAVE_SSE42_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_SSE42" + SFLAGS="${SFLAGS} -DX86_SSE42" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo" + fi + + check_pclmulqdq_intrinsics + + if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC" + SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_pclmulqdq.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_pclmulqdq.lo" + fi + check_avx2_intrinsics if test ${HAVE_AVX2_INTRIN} -eq 1; then @@ -1496,13 +1590,6 @@ case "${ARCH}" in SFLAGS="${SFLAGS} -DX86_AVX512" ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_avx512.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512.lo" - - check_mask_intrinsics - - if test ${HAVE_MASK_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_MASK_INTRIN" - SFLAGS="${SFLAGS} -DX86_MASK_INTRIN" - fi fi check_mtune_cascadelake_compiler_flag @@ -1526,63 +1613,16 @@ case "${ARCH}" in ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo" fi - check_sse42_intrinsics + if test $buildvpclmulqdq -eq 1 && test ${HAVE_PCLMULQDQ_INTRIN} -eq 1 && test ${HAVE_AVX512_INTRIN} -eq 1; then + check_vpclmulqdq_intrinsics - if test ${HAVE_SSE42_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_SSE42" - SFLAGS="${SFLAGS} -DX86_SSE42" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo" - fi - - check_sse2_intrinsics - - if test ${HAVE_SSE2_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_SSE2" - SFLAGS="${SFLAGS} -DX86_SSE2" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o compare256_sse2.o slide_hash_sse2.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo compare256_sse2.lo slide_hash_sse2.lo" - - if test $forcesse2 -eq 1; then - CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2" - SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2" + if test ${HAVE_VPCLMULQDQ_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_VPCLMULQDQ_CRC" + SFLAGS="${SFLAGS} -DX86_VPCLMULQDQ_CRC" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_vpclmulqdq.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_vpclmulqdq.lo" fi fi - - check_ssse3_intrinsics - - if test ${HAVE_SSSE3_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_SSSE3" - SFLAGS="${SFLAGS} -DX86_SSSE3" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o chunkset_ssse3.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo chunkset_ssse3.lo" - fi - - check_pclmulqdq_intrinsics - - if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC" - SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_pclmulqdq.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_pclmulqdq.lo" - - if test $buildvpclmulqdq -eq 1; then - check_vpclmulqdq_intrinsics - - if test ${HAVE_VPCLMULQDQ_INTRIN} -eq 1 && test ${HAVE_AVX512_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_VPCLMULQDQ_CRC" - SFLAGS="${SFLAGS} -DX86_VPCLMULQDQ_CRC" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_vpclmulqdq.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_vpclmulqdq.lo" - fi - fi - fi - - check_xsave_intrinsics - - if test ${HAVE_XSAVE_INTRIN} -eq 0; then - xsaveflag="" - fi fi ;; @@ -1603,11 +1643,11 @@ case "${ARCH}" in cat > $test.c << EOF int main() { return 0; } EOF - if try $CC -w -c $SFLAGS $test.c -mfloat-abi=softfp && + if try $CC -c $SFLAGS $test.c -mfloat-abi=softfp && try $LDSHARED $LDSHAREDFLAGS $LDFLAGS -o $test$shared_ext $test.o $LDSHAREDLIBC; then floatabi="-mfloat-abi=softfp" else - if try $CC -w -c $SFLAGS $test.c -mfloat-abi=hard && + if try $CC -c $SFLAGS $test.c -mfloat-abi=hard && try $LDSHARED $LDSHAREDFLAGS $LDFLAGS -o $test$shared_ext $test.o $LDSHAREDLIBC; then floatabi="-mfloat-abi=hard" fi @@ -1713,8 +1753,8 @@ EOF CFLAGS="${CFLAGS} -DARM_ACLE" SFLAGS="${SFLAGS} -DARM_ACLE" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo" fi fi @@ -1828,11 +1868,6 @@ EOF ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} s390_features.lo" fi - if test $builddfltccdeflate -eq 1 -o $builddfltccinflate -eq 1; then - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} dfltcc_common.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} dfltcc_common.lo" - fi - if test $builddfltccdeflate -eq 1; then CFLAGS="${CFLAGS} -DS390_DFLTCC_DEFLATE" SFLAGS="${SFLAGS} -DS390_DFLTCC_DEFLATE" @@ -1941,7 +1976,7 @@ replace_in_file() { # update Makefile with the configure results -INCLUDES="-I$SRCDIR" +INCLUDES="-I$SRCDIR -I$SRCDIR/$ARCHDIR -I$SRCDIR/arch/generic" if [ "$SRCDIR" != "$BUILDDIR" ]; then INCLUDES="-I$BUILDDIR ${INCLUDES}"; fi sed < $SRCDIR/Makefile.in " @@ -1951,6 +1986,7 @@ sed < $SRCDIR/Makefile.in " /^LDFLAGS *=/s#=.*#=$LDFLAGS# /^LDSHARED *=/s#=.*#=$LDSHARED# /^LDSHAREDFLAGS *=/s#=.*#=$LDSHAREDFLAGS# +/^LDVERSIONSCRIPT *=/s#=.*#=$LDVERSIONSCRIPT# /^LIBNAME1 *=/s#=.*#=$LIBNAME# /^LIBNAME2 *=/s#=.*#=$LIBNAME2# /^SUFFIX *=/s#=.*#=$SUFFIX# @@ -2044,15 +2080,12 @@ done # Generate Makefile in arch dir mkdir -p $ARCHDIR -ARCHINCLUDES="-I$SRCDIR/$ARCHDIR -I$SRCDIR" -if [ "$SRCDIR" != "$BUILDDIR" ]; then ARCHINCLUDES="-I$BUILDDIR ${ARCHINCLUDES}"; fi - sed < $SRCDIR/$ARCHDIR/Makefile.in " /^CC *=/s#=.*#=$CC# /^CFLAGS *=/s#=.*#=$CFLAGS# /^SFLAGS *=/s#=.*#=$SFLAGS# /^LDFLAGS *=/s#=.*#=$LDFLAGS# -/^INCLUDES *=/s#=.*#=$ARCHINCLUDES# +/^INCLUDES *=/s#=.*#=$INCLUDES# /^SUFFIX *=/s#=.*#=$SUFFIX# /^SRCDIR *=/s#=.*#=$SRCDIR/$ARCHDIR# /^SRCTOP *=/s#=.*#=$SRCDIR# @@ -2112,6 +2145,21 @@ for file in $SRCDIR/$ARCHDIR/*.c; do fi done +# Generate Makefile in generic arch dir +mkdir -p arch/generic + +sed < $SRCDIR/arch/generic/Makefile.in " +/^CC *=/s#=.*#=$CC# +/^CFLAGS *=/s#=.*#=$CFLAGS# +/^SFLAGS *=/s#=.*#=$SFLAGS# +/^INCLUDES *=/s#=.*#=$INCLUDES# +/^SRCDIR *=/s#=.*#=$SRCDIR/arch/generic# +/^SRCTOP *=/s#=.*#=$SRCDIR# +/^BUILDDIR *=/s#=.*#=$BUILDDIR# +" > arch/generic/Makefile + +## TODO: Process header dependencies + # Emscripten does not support large amounts of data via stdin/out # https://github.com/emscripten-core/emscripten/issues/16755#issuecomment-1102732849 if test "$CHOST" != "wasm32"; then diff --git a/src/native/external/zlib-ng/cpu_features.h b/src/native/external/zlib-ng/cpu_features.h index 00fa6c747c5ff..8708724bc0d53 100644 --- a/src/native/external/zlib-ng/cpu_features.h +++ b/src/native/external/zlib-ng/cpu_features.h @@ -6,12 +6,10 @@ #ifndef CPU_FEATURES_H_ #define CPU_FEATURES_H_ -#include "adler32_fold.h" -#include "crc32_fold.h" +#ifndef DISABLE_RUNTIME_CPU_DETECTION #if defined(X86_FEATURES) # include "arch/x86/x86_features.h" -# include "fallback_builtins.h" #elif defined(ARM_FEATURES) # include "arch/arm/arm_features.h" #elif defined(PPC_FEATURES) || defined(POWER_FEATURES) @@ -38,266 +36,8 @@ struct cpu_features { #endif }; -extern void cpu_check_features(struct cpu_features *features); +void cpu_check_features(struct cpu_features *features); -/* adler32 */ -typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); - -extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); -#ifdef ARM_NEON -extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef PPC_VMX -extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef RISCV_RVV -extern uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_SSSE3 -extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_AVX2 -extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_AVX512 -extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_AVX512VNNI -extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef POWER8_VSX -extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); -#endif - -/* adler32 folding */ -#ifdef RISCV_RVV -extern uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_SSE42 -extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_AVX2 -extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_AVX512 -extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_AVX512VNNI -extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif - -/* CRC32 folding */ -#ifdef X86_PCLMULQDQ_CRC -extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); -extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); -extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); -extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); -extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); -#endif -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) -extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); -extern void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); -extern void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); -extern uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc); -extern uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); -#endif - -/* memory chunking */ -extern uint32_t chunksize_c(void); -extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#ifdef X86_SSE2 -extern uint32_t chunksize_sse2(void); -extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef X86_SSSE3 -extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef X86_AVX2 -extern uint32_t chunksize_avx2(void); -extern uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef ARM_NEON -extern uint32_t chunksize_neon(void); -extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef POWER8_VSX -extern uint32_t chunksize_power8(void); -extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef RISCV_RVV -extern uint32_t chunksize_rvv(void); -extern uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif - -#ifdef ZLIB_COMPAT -typedef struct z_stream_s z_stream; -#else -typedef struct zng_stream_s zng_stream; -#endif - -/* inflate fast loop */ -extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); -#ifdef X86_SSE2 -extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef X86_SSSE3 -extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef X86_AVX2 -extern void inflate_fast_avx2(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef ARM_NEON -extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef POWER8_VSX -extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef RISCV_RVV -extern void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); -#endif - -/* CRC32 */ -typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len); - -extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); -#ifdef ARM_ACLE -extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); -#elif defined(POWER8_VSX) -extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); -#elif defined(S390_CRC32_VX) -extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len); -#endif - -/* compare256 */ -typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1); - -extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); -#ifdef HAVE_BUILTIN_CTZ -extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); -#endif -#endif -#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); -#endif -#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); -#endif -#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); -#endif -#ifdef POWER9 -extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); -#endif -#ifdef RISCV_RVV -extern uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); -#endif - -#ifdef DEFLATE_H_ -/* insert_string */ -extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); -#ifdef X86_SSE42 -extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count); -#elif defined(ARM_ACLE) -extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); -#endif - -/* longest_match */ -extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match); -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); -#ifdef HAVE_BUILTIN_CTZ -extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); -#endif -#endif -#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); -#endif -#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); -#endif -#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match); -#endif -#ifdef POWER9 -extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match); -#endif -#ifdef RISCV_RVV -extern uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match); -#endif - -/* longest_match_slow */ -extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); -extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); -#ifdef UNALIGNED64_OK -extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); -#endif -#endif -#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); -#endif -#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); -#endif -#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match); -#endif -#ifdef POWER9 -extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); -#endif -#ifdef RISCV_RVV -extern uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match); -#endif - -/* quick_insert_string */ -extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); -#ifdef X86_SSE42 -extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str); -#elif defined(ARM_ACLE) -extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); -#endif - -/* slide_hash */ -typedef void (*slide_hash_func)(deflate_state *s); - -#ifdef X86_SSE2 -extern void slide_hash_sse2(deflate_state *s); -#endif -#if defined(ARM_SIMD) -extern void slide_hash_armv6(deflate_state *s); -#endif -#if defined(ARM_NEON) -extern void slide_hash_neon(deflate_state *s); -#endif -#if defined(PPC_VMX) -extern void slide_hash_vmx(deflate_state *s); -#endif -#if defined(POWER8_VSX) -extern void slide_hash_power8(deflate_state *s); -#endif -#if defined(RISCV_RVV) -extern void slide_hash_rvv(deflate_state *s); -#endif -#ifdef X86_AVX2 -extern void slide_hash_avx2(deflate_state *s); -#endif - -/* update_hash */ -extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val); -#ifdef X86_SSE42 -extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val); -#elif defined(ARM_ACLE) -extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val); -#endif #endif #endif diff --git a/src/native/external/zlib-ng/crc32.c b/src/native/external/zlib-ng/crc32.c new file mode 100644 index 0000000000000..54f6ecd4208ca --- /dev/null +++ b/src/native/external/zlib-ng/crc32.c @@ -0,0 +1,42 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * This interleaved implementation of a CRC makes use of pipelined multiple + * arithmetic-logic units, commonly found in modern CPU cores. It is due to + * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. + */ + +#include "zbuild.h" +#include "functable.h" +#include "crc32_braid_tbl.h" + +/* ========================================================================= */ + +const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) { + return (const uint32_t *)crc_table; +} + +#ifdef ZLIB_COMPAT +unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) { + if (buf == NULL) return 0; + + return (unsigned long)FUNCTABLE_CALL(crc32)((uint32_t)crc, buf, len); +} +#else +uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) { + if (buf == NULL) return 0; + + return FUNCTABLE_CALL(crc32)(crc, buf, len); +} +#endif + +#ifdef ZLIB_COMPAT +unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) { + return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len); +} +#else +uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) { + return PREFIX(crc32_z)(crc, buf, len); +} +#endif diff --git a/src/native/external/zlib-ng/crc32.h b/src/native/external/zlib-ng/crc32.h new file mode 100644 index 0000000000000..8c3d7a8a3ef2f --- /dev/null +++ b/src/native/external/zlib-ng/crc32.h @@ -0,0 +1,16 @@ +/* crc32.h -- crc32 folding interface + * Copyright (C) 2021 Nathan Moinvaziri + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef CRC32_H_ +#define CRC32_H_ + +#define CRC32_FOLD_BUFFER_SIZE (16 * 4) +/* sizeof(__m128i) * (4 folds) */ + +typedef struct crc32_fold_s { + uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; + uint32_t value; +} crc32_fold; + +#endif diff --git a/src/native/external/zlib-ng/crc32_braid_comb.c b/src/native/external/zlib-ng/crc32_braid_comb.c index 75fb474258739..f253ae10a2440 100644 --- a/src/native/external/zlib-ng/crc32_braid_comb.c +++ b/src/native/external/zlib-ng/crc32_braid_comb.c @@ -7,7 +7,6 @@ * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. */ -#include "zbuild.h" #include "zutil.h" #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" diff --git a/src/native/external/zlib-ng/crc32_braid_p.h b/src/native/external/zlib-ng/crc32_braid_p.h index 1d8a07068a4c5..003bf91920fa9 100644 --- a/src/native/external/zlib-ng/crc32_braid_p.h +++ b/src/native/external/zlib-ng/crc32_braid_p.h @@ -1,7 +1,6 @@ #ifndef CRC32_BRAID_P_H_ #define CRC32_BRAID_P_H_ -#include "zbuild.h" #include "zendian.h" /* Define N */ @@ -25,7 +24,7 @@ # endif #else # ifndef W -# if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) +# if defined(__x86_64__) || defined(_M_AMD64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__) # define W 8 # else # define W 4 @@ -42,9 +41,24 @@ # endif #endif +#if BYTE_ORDER == LITTLE_ENDIAN +# define ZSWAPWORD(word) (word) +# define BRAID_TABLE crc_braid_table +#elif BYTE_ORDER == BIG_ENDIAN +# if W == 8 +# define ZSWAPWORD(word) ZSWAP64(word) +# elif W == 4 +# define ZSWAPWORD(word) ZSWAP32(word) +# endif +# define BRAID_TABLE crc_braid_big_table +#else +# error "No endian defined" +#endif + +#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + /* CRC polynomial. */ #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ -extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); - #endif /* CRC32_BRAID_P_H_ */ diff --git a/src/native/external/zlib-ng/deflate.c b/src/native/external/zlib-ng/deflate.c index 2a0a20e5d29a9..eb5ae0aabd597 100644 --- a/src/native/external/zlib-ng/deflate.c +++ b/src/native/external/zlib-ng/deflate.c @@ -1,5 +1,5 @@ /* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -58,7 +58,7 @@ # undef deflateInit2 #endif -const char PREFIX(deflate_copyright)[] = " deflate 1.3.0 Copyright 1995-2023 Jean-loup Gailly and Mark Adler "; +const char PREFIX(deflate_copyright)[] = " deflate 1.3.1 Copyright 1995-2024 Jean-loup Gailly and Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -71,14 +71,16 @@ const char PREFIX(deflate_copyright)[] = " deflate 1.3.0 Copyright 1995-2023 Jea */ #ifdef S390_DFLTCC_DEFLATE # include "arch/s390/dfltcc_deflate.h" +/* DFLTCC instructions require window to be page-aligned */ +# define PAD_WINDOW PAD_4096 +# define WINDOW_PAD_SIZE 4096 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_4096 #else -/* Memory management for the deflate state. Useful for allocating arch-specific extension blocks. */ -# define ZALLOC_DEFLATE_STATE(strm) ((deflate_state *)ZALLOC(strm, 1, sizeof(deflate_state))) -# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) -# define ZCOPY_DEFLATE_STATE(dst, src) memcpy(dst, src, sizeof(deflate_state)) -/* Memory management for the window. Useful for allocation the aligned window. */ -# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) -# define TRY_FREE_WINDOW(strm, addr) TRY_FREE(strm, addr) +# define PAD_WINDOW PAD_64 +# define WINDOW_PAD_SIZE 64 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_64 +/* Adjust the window size for the arch-specific deflate code. */ +# define DEFLATE_ADJUST_WINDOW_SIZE(n) (n) /* Invoked at the beginning of deflateSetDictionary(). Useful for checking arch-specific window data. */ # define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) /* Invoked at the beginning of deflateGetDictionary(). Useful for adjusting arch-specific window data. */ @@ -120,10 +122,6 @@ static void lm_set_level (deflate_state *s, int level); static void lm_init (deflate_state *s); Z_INTERNAL unsigned read_buf (PREFIX3(stream) *strm, unsigned char *buf, unsigned size); -extern uint32_t update_hash_roll (deflate_state *const s, uint32_t h, uint32_t val); -extern void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count); -extern Pos quick_insert_string_roll(deflate_state *const s, uint32_t str); - /* =========================================================================== * Local data */ @@ -185,17 +183,111 @@ static const config configuration_table[10] = { memset((unsigned char *)s->head, 0, HASH_SIZE * sizeof(*s->head)); \ } while (0) -/* ========================================================================= */ -/* This function is hidden in ZLIB_COMPAT builds. */ + +#ifdef DEF_ALLOC_DEBUG +# include +# define LOGSZ(name,size) fprintf(stderr, "%s is %d bytes\n", name, size) +# define LOGSZP(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %d, padded %d\n", name, size, loc, pad) +# define LOGSZPL(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %ld, padded %d\n", name, size, loc, pad) +#else +# define LOGSZ(name,size) +# define LOGSZP(name,size,loc,pad) +# define LOGSZPL(name,size,loc,pad) +#endif + +/* =========================================================================== + * Allocate a big buffer and divide it up into the various buffers deflate needs. + * Handles alignment of allocated buffer and alignment of individual buffers. + */ +Z_INTERNAL deflate_allocs* alloc_deflate(PREFIX3(stream) *strm, int windowBits, int lit_bufsize) { + int curr_size = 0; + + /* Define sizes */ + int window_size = DEFLATE_ADJUST_WINDOW_SIZE((1 << windowBits) * 2); + int prev_size = (1 << windowBits) * (int)sizeof(Pos); + int head_size = HASH_SIZE * sizeof(Pos); + int pending_size = lit_bufsize * LIT_BUFS; + int state_size = sizeof(deflate_state); + int alloc_size = sizeof(deflate_allocs); + + /* Calculate relative buffer positions and paddings */ + LOGSZP("window", window_size, PAD_WINDOW(curr_size), PADSZ(curr_size,WINDOW_PAD_SIZE)); + int window_pos = PAD_WINDOW(curr_size); + curr_size = window_pos + window_size; + + LOGSZP("prev", prev_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int prev_pos = PAD_64(curr_size); + curr_size = prev_pos + prev_size; + + LOGSZP("head", head_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int head_pos = PAD_64(curr_size); + curr_size = head_pos + head_size; + + LOGSZP("pending", pending_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int pending_pos = PAD_64(curr_size); + curr_size = pending_pos + pending_size; + + LOGSZP("state", state_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int state_pos = PAD_64(curr_size); + curr_size = state_pos + state_size; + + LOGSZP("alloc", alloc_size, PAD_16(curr_size), PADSZ(curr_size,16)); + int alloc_pos = PAD_16(curr_size); + curr_size = alloc_pos + alloc_size; + + /* Add 64-1 or 4096-1 to allow window alignment, and round size of buffer up to multiple of 64 */ + int total_size = PAD_64(curr_size + (WINDOW_PAD_SIZE - 1)); + + /* Allocate buffer, align to 64-byte cacheline, and zerofill the resulting buffer */ + char *original_buf = strm->zalloc(strm->opaque, 1, total_size); + if (original_buf == NULL) + return NULL; + + char *buff = (char *)HINT_ALIGNED_WINDOW((char *)PAD_WINDOW(original_buf)); + LOGSZPL("Buffer alloc", total_size, PADSZ((uintptr_t)original_buf,WINDOW_PAD_SIZE), PADSZ(curr_size,WINDOW_PAD_SIZE)); + + /* Initialize alloc_bufs */ + deflate_allocs *alloc_bufs = (struct deflate_allocs_s *)(buff + alloc_pos); + alloc_bufs->buf_start = (char *)original_buf; + alloc_bufs->zfree = strm->zfree; + + /* Assign buffers */ + alloc_bufs->window = (unsigned char *)HINT_ALIGNED_WINDOW(buff + window_pos); + alloc_bufs->prev = (Pos *)HINT_ALIGNED_64(buff + prev_pos); + alloc_bufs->head = (Pos *)HINT_ALIGNED_64(buff + head_pos); + alloc_bufs->pending_buf = (unsigned char *)HINT_ALIGNED_64(buff + pending_pos); + alloc_bufs->state = (deflate_state *)HINT_ALIGNED_16(buff + state_pos); + + memset((char *)alloc_bufs->prev, 0, prev_size); + + return alloc_bufs; +} + +/* =========================================================================== + * Free all allocated deflate buffers + */ +static inline void free_deflate(PREFIX3(stream) *strm) { + deflate_state *state = (deflate_state *)strm->state; + + if (state->alloc_bufs != NULL) { + deflate_allocs *alloc_bufs = state->alloc_bufs; + alloc_bufs->zfree(strm->opaque, alloc_bufs->buf_start); + strm->state = NULL; + } +} + +/* =========================================================================== + * Initialize deflate state and buffers. + * This function is hidden in ZLIB_COMPAT builds. + */ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level, int32_t method, int32_t windowBits, int32_t memLevel, int32_t strategy) { /* Todo: ignore strm->next_in if we use it as window */ - uint32_t window_padding = 0; deflate_state *s; int wrap = 1; - /* Force initialization functable, because deflate captures function pointers from functable. */ - functable.force_init(); + /* Initialize functable */ + FUNCTABLE_INIT; if (strm == NULL) return Z_STREAM_ERROR; @@ -230,9 +322,19 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ - s = ZALLOC_DEFLATE_STATE(strm); - if (s == NULL) + /* Allocate buffers */ + int lit_bufsize = 1 << (memLevel + 6); + deflate_allocs *alloc_bufs = alloc_deflate(strm, windowBits, lit_bufsize); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + s = alloc_bufs->state; + s->alloc_bufs = alloc_bufs; + s->window = alloc_bufs->window; + s->prev = alloc_bufs->prev; + s->head = alloc_bufs->head; + s->pending_buf = alloc_bufs->pending_buf; + strm->state = (struct internal_state *)s; s->strm = strm; s->status = INIT_STATE; /* to pass state test in deflateReset() */ @@ -243,18 +345,9 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level s->w_size = 1 << s->w_bits; s->w_mask = s->w_size - 1; -#ifdef X86_PCLMULQDQ_CRC - window_padding = 8; -#endif - - s->window = (unsigned char *) ZALLOC_WINDOW(strm, s->w_size + window_padding, 2*sizeof(unsigned char)); - s->prev = (Pos *) ZALLOC(strm, s->w_size, sizeof(Pos)); - memset(s->prev, 0, s->w_size * sizeof(Pos)); - s->head = (Pos *) ZALLOC(strm, HASH_SIZE, sizeof(Pos)); - s->high_water = 0; /* nothing written to s->window yet */ - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + s->lit_bufsize = lit_bufsize; /* 16K elements by default */ /* We overlay pending_buf and sym_buf. This works since the average size * for length/distance pairs over any compressed block is assured to be 31 @@ -295,7 +388,6 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level * symbols from which it is being constructed. */ - s->pending_buf = (unsigned char *) ZALLOC(strm, s->lit_bufsize, 4); s->pending_buf_size = s->lit_bufsize * 4; if (s->window == NULL || s->prev == NULL || s->head == NULL || s->pending_buf == NULL) { @@ -304,8 +396,15 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level PREFIX(deflateEnd)(strm); return Z_MEM_ERROR; } + +#ifdef LIT_MEM + s->d_buf = (uint16_t *)(s->pending_buf + (s->lit_bufsize << 1)); + s->l_buf = s->pending_buf + (s->lit_bufsize << 2); + s->sym_end = s->lit_bufsize - 1; +#else s->sym_buf = s->pending_buf + s->lit_bufsize; s->sym_end = (s->lit_bufsize - 1) * 3; +#endif /* We avoid equality with lit_bufsize*3 because of wraparound at 64K * on 16 bit machines and because stored blocks are restricted to * 64K-1 bytes. @@ -348,7 +447,7 @@ static int deflateStateCheck(PREFIX3(stream) *strm) { if (strm == NULL || strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) return 1; s = strm->state; - if (s == NULL || s->strm != strm || (s->status < INIT_STATE || s->status > MAX_STATE)) + if (s == NULL || s->alloc_bufs == NULL || s->strm != strm || (s->status < INIT_STATE || s->status > MAX_STATE)) return 1; return 0; } @@ -370,7 +469,7 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8 /* when using zlib wrappers, compute Adler-32 for provided dictionary */ if (wrap == 1) - strm->adler = functable.adler32(strm->adler, dictionary, dictLength); + strm->adler = FUNCTABLE_CALL(adler32)(strm->adler, dictionary, dictLength); DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ s->wrap = 0; /* avoid computing Adler-32 in read_buf */ @@ -457,7 +556,7 @@ int32_t Z_EXPORT PREFIX(deflateResetKeep)(PREFIX3(stream) *strm) { #ifdef GZIP if (s->wrap == 2) { - strm->adler = functable.crc32_fold_reset(&s->crc_fold); + strm->adler = FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold); } else #endif strm->adler = ADLER32_INITIAL_VALUE; @@ -506,9 +605,17 @@ int32_t Z_EXPORT PREFIX(deflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32 if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; + +#ifdef LIT_MEM + if (bits < 0 || bits > BIT_BUF_SIZE || + (unsigned char *)s->d_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3)) + return Z_BUF_ERROR; +#else if (bits < 0 || bits > BIT_BUF_SIZE || bits > (int32_t)(sizeof(value) << 3) || s->sym_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3)) return Z_BUF_ERROR; +#endif + do { put = BIT_BUF_SIZE - s->bi_valid; put = MIN(put, bits); @@ -555,7 +662,7 @@ int32_t Z_EXPORT PREFIX(deflateParams)(PREFIX3(stream) *strm, int32_t level, int if (s->level != level) { if (s->level == 0 && s->matches != 0) { if (s->matches == 1) { - functable.slide_hash(s); + FUNCTABLE_CALL(slide_hash)(s); } else { CLEAR_HASH(s); } @@ -794,7 +901,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { #ifdef GZIP if (s->status == GZIP_STATE) { /* gzip header */ - functable.crc32_fold_reset(&s->crc_fold); + FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold); put_byte(s, 31); put_byte(s, 139); put_byte(s, 8); @@ -911,7 +1018,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { } } put_short(s, (uint16_t)strm->adler); - functable.crc32_fold_reset(&s->crc_fold); + FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold); } s->status = BUSY_STATE; @@ -982,7 +1089,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { /* Write the trailer */ #ifdef GZIP if (s->wrap == 2) { - strm->adler = functable.crc32_fold_final(&s->crc_fold); + strm->adler = FUNCTABLE_CALL(crc32_fold_final)(&s->crc_fold); put_uint32(s, strm->adler); put_uint32(s, (uint32_t)strm->total_in); @@ -1007,21 +1114,13 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { /* ========================================================================= */ int32_t Z_EXPORT PREFIX(deflateEnd)(PREFIX3(stream) *strm) { - int32_t status; - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - status = strm->state->status; - - /* Deallocate in reverse order of allocations: */ - TRY_FREE(strm, strm->state->pending_buf); - TRY_FREE(strm, strm->state->head); - TRY_FREE(strm, strm->state->prev); - TRY_FREE_WINDOW(strm, strm->state->window); + int32_t status = strm->state->status; - ZFREE_STATE(strm, strm->state); - strm->state = NULL; + /* Free allocated buffers */ + free_deflate(strm); return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; } @@ -1032,7 +1131,6 @@ int32_t Z_EXPORT PREFIX(deflateEnd)(PREFIX3(stream) *strm) { int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *source) { deflate_state *ds; deflate_state *ss; - uint32_t window_padding = 0; if (deflateStateCheck(source) || dest == NULL) return Z_STREAM_ERROR; @@ -1041,34 +1139,39 @@ int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); - ds = ZALLOC_DEFLATE_STATE(dest); - if (ds == NULL) + deflate_allocs *alloc_bufs = alloc_deflate(dest, ss->w_bits, ss->lit_bufsize); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + ds = alloc_bufs->state; + dest->state = (struct internal_state *) ds; - ZCOPY_DEFLATE_STATE(ds, ss); + memcpy(ds, ss, sizeof(deflate_state)); ds->strm = dest; -#ifdef X86_PCLMULQDQ_CRC - window_padding = 8; -#endif - - ds->window = (unsigned char *) ZALLOC_WINDOW(dest, ds->w_size + window_padding, 2*sizeof(unsigned char)); - ds->prev = (Pos *) ZALLOC(dest, ds->w_size, sizeof(Pos)); - ds->head = (Pos *) ZALLOC(dest, HASH_SIZE, sizeof(Pos)); - ds->pending_buf = (unsigned char *) ZALLOC(dest, ds->lit_bufsize, 4); + ds->alloc_bufs = alloc_bufs; + ds->window = alloc_bufs->window; + ds->prev = alloc_bufs->prev; + ds->head = alloc_bufs->head; + ds->pending_buf = alloc_bufs->pending_buf; if (ds->window == NULL || ds->prev == NULL || ds->head == NULL || ds->pending_buf == NULL) { PREFIX(deflateEnd)(dest); return Z_MEM_ERROR; } - memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(unsigned char)); + memcpy(ds->window, ss->window, DEFLATE_ADJUST_WINDOW_SIZE(ds->w_size * 2 * sizeof(unsigned char))); memcpy((void *)ds->prev, (void *)ss->prev, ds->w_size * sizeof(Pos)); memcpy((void *)ds->head, (void *)ss->head, HASH_SIZE * sizeof(Pos)); - memcpy(ds->pending_buf, ss->pending_buf, ds->pending_buf_size); + memcpy(ds->pending_buf, ss->pending_buf, ds->lit_bufsize * LIT_BUFS); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +#ifdef LIT_MEM + ds->d_buf = (uint16_t *)(ds->pending_buf + (ds->lit_bufsize << 1)); + ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); +#else ds->sym_buf = ds->pending_buf + ds->lit_bufsize; +#endif ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; @@ -1095,10 +1198,10 @@ Z_INTERNAL unsigned PREFIX(read_buf)(PREFIX3(stream) *strm, unsigned char *buf, memcpy(buf, strm->next_in, len); #ifdef GZIP } else if (strm->state->wrap == 2) { - functable.crc32_fold_copy(&strm->state->crc_fold, buf, strm->next_in, len); + FUNCTABLE_CALL(crc32_fold_copy)(&strm->state->crc_fold, buf, strm->next_in, len); #endif } else if (strm->state->wrap == 1) { - strm->adler = functable.adler32_fold_copy(strm->adler, buf, strm->next_in, len); + strm->adler = FUNCTABLE_CALL(adler32_fold_copy)(strm->adler, buf, strm->next_in, len); } else { memcpy(buf, strm->next_in, len); } @@ -1125,9 +1228,9 @@ static void lm_set_level(deflate_state *s, int level) { s->insert_string = &insert_string_roll; s->quick_insert_string = &quick_insert_string_roll; } else { - s->update_hash = functable.update_hash; - s->insert_string = functable.insert_string; - s->quick_insert_string = functable.quick_insert_string; + s->update_hash = update_hash; + s->insert_string = insert_string; + s->quick_insert_string = quick_insert_string; } s->level = level; @@ -1191,7 +1294,7 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) { s->block_start -= (int)wsize; if (s->insert > s->strstart) s->insert = s->strstart; - functable.slide_hash(s); + FUNCTABLE_CALL(slide_hash)(s); more += wsize; } if (s->strm->avail_in == 0) @@ -1217,7 +1320,7 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) { if (s->lookahead + s->insert >= STD_MIN_MATCH) { unsigned int str = s->strstart - s->insert; if (UNLIKELY(s->max_chain_length > 1024)) { - s->ins_h = s->update_hash(s, s->window[str], s->window[str+1]); + s->ins_h = s->update_hash(s->window[str], s->window[str+1]); } else if (str >= 1) { s->quick_insert_string(s, str + 2 - STD_MIN_MATCH); } diff --git a/src/native/external/zlib-ng/deflate.h b/src/native/external/zlib-ng/deflate.h index 8001b47c999d8..e122ae1aad657 100644 --- a/src/native/external/zlib-ng/deflate.h +++ b/src/native/external/zlib-ng/deflate.h @@ -12,8 +12,12 @@ #include "zutil.h" #include "zendian.h" -#include "adler32_fold.h" -#include "crc32_fold.h" +#include "crc32.h" + +#ifdef S390_DFLTCC_DEFLATE +# include "arch/s390/dfltcc_common.h" +# define HAVE_ARCH_DEFLATE_STATE +#endif /* define NO_GZIP when compiling if you want to disable gzip header and trailer creation by deflate(). NO_GZIP would be used to avoid linking in @@ -23,6 +27,12 @@ # define GZIP #endif +/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at + the cost of a larger memory footprint */ +#ifndef NO_LIT_MEM +# define LIT_MEM +#endif + /* =========================================================================== * Internal compression state. */ @@ -108,11 +118,30 @@ typedef uint16_t Pos; /* Type definitions for hash callbacks */ typedef struct internal_state deflate_state; -typedef uint32_t (* update_hash_cb) (deflate_state *const s, uint32_t h, uint32_t val); +typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val); typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count); typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str); -struct internal_state { +uint32_t update_hash (uint32_t h, uint32_t val); +void insert_string (deflate_state *const s, uint32_t str, uint32_t count); +Pos quick_insert_string (deflate_state *const s, uint32_t str); + +uint32_t update_hash_roll (uint32_t h, uint32_t val); +void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count); +Pos quick_insert_string_roll(deflate_state *const s, uint32_t str); + +/* Struct for memory allocation handling */ +typedef struct deflate_allocs_s { + char *buf_start; + free_func zfree; + deflate_state *state; + unsigned char *window; + unsigned char *pending_buf; + Pos *prev; + Pos *head; +} deflate_allocs; + +struct ALIGNED_(64) internal_state { PREFIX3(stream) *strm; /* pointer back to this zlib stream */ unsigned char *pending_buf; /* output still pending */ unsigned char *pending_out; /* next pending byte to output to the stream */ @@ -260,8 +289,16 @@ struct internal_state { * - I can't count above 4 */ +#ifdef LIT_MEM +# define LIT_BUFS 5 + uint16_t *d_buf; /* buffer for distances */ + unsigned char *l_buf; /* buffer for literals/lengths */ +#else +# define LIT_BUFS 4 unsigned char *sym_buf; /* buffer for distances and literals/lengths */ - unsigned int sym_next; /* running index in sym_buf */ +#endif + + unsigned int sym_next; /* running index in symbol buffer */ unsigned int sym_end; /* symbol table full when sym_next reaches this */ unsigned long opt_len; /* bit length of current block with optimal trees */ @@ -273,8 +310,11 @@ struct internal_state { unsigned long compressed_len; /* total bit length of compressed file mod 2^32 */ unsigned long bits_sent; /* bit length of compressed data sent mod 2^32 */ - /* Reserved for future use and alignment purposes */ - char *reserved_p; + deflate_allocs *alloc_bufs; + +#ifdef HAVE_ARCH_DEFLATE_STATE + arch_deflate_state arch; /* architecture-specific extensions */ +#endif uint64_t bi_buf; /* Output buffer. bits are inserted starting at the bottom (least significant bits). */ @@ -284,7 +324,7 @@ struct internal_state { /* Reserved for future use and alignment purposes */ int32_t reserved[11]; -} ALIGNED_(8); +}; typedef enum { need_more, /* block not completed, need more input or more output */ diff --git a/src/native/external/zlib-ng/deflate_fast.c b/src/native/external/zlib-ng/deflate_fast.c index eada7b46e329a..e682697d5cc51 100644 --- a/src/native/external/zlib-ng/deflate_fast.c +++ b/src/native/external/zlib-ng/deflate_fast.c @@ -1,6 +1,6 @@ /* deflate_fast.c -- compress data using the fast strategy of deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -41,7 +41,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { * dictionary, and set hash_head to the head of the hash chain: */ if (s->lookahead >= WANT_MIN_MATCH) { - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); dist = (int64_t)s->strstart - hash_head; /* Find the longest match, discarding those <= prev_length. @@ -52,7 +52,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - match_len = functable.longest_match(s, hash_head); + match_len = FUNCTABLE_CALL(longest_match)(s, hash_head); /* longest_match() sets match_start */ } } @@ -73,11 +73,11 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { match_len--; /* string at strstart already in table */ s->strstart++; - functable.insert_string(s, s->strstart, match_len); + insert_string(s, s->strstart, match_len); s->strstart += match_len; } else { s->strstart += match_len; - functable.quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH); + quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH); /* If lookahead < STD_MIN_MATCH, ins_h is garbage, but it does not * matter since it will be recomputed at next deflate call. diff --git a/src/native/external/zlib-ng/deflate_huff.c b/src/native/external/zlib-ng/deflate_huff.c index b197e24d7c38f..d5a234b114a42 100644 --- a/src/native/external/zlib-ng/deflate_huff.c +++ b/src/native/external/zlib-ng/deflate_huff.c @@ -1,6 +1,6 @@ /* deflate_huff.c -- compress data using huffman encoding only strategy * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ diff --git a/src/native/external/zlib-ng/deflate_medium.c b/src/native/external/zlib-ng/deflate_medium.c index 47796e32217a1..2aeebe2026bda 100644 --- a/src/native/external/zlib-ng/deflate_medium.c +++ b/src/native/external/zlib-ng/deflate_medium.c @@ -45,16 +45,18 @@ static void insert_match(deflate_state *s, struct match match) { if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + WANT_MIN_MATCH))) return; + /* string at strstart already in table */ + match.strstart++; + match.match_length--; + /* matches that are not long enough we need to emit as literals */ - if (LIKELY(match.match_length < WANT_MIN_MATCH)) { - match.strstart++; - match.match_length--; + if (LIKELY(match.match_length < WANT_MIN_MATCH - 1)) { if (UNLIKELY(match.match_length > 0)) { if (match.strstart >= match.orgstart) { if (match.strstart + match.match_length - 1 >= match.orgstart) { - functable.insert_string(s, match.strstart, match.match_length); + insert_string(s, match.strstart, match.match_length); } else { - functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); + insert_string(s, match.strstart, match.orgstart - match.strstart + 1); } match.strstart += match.match_length; match.match_length = 0; @@ -63,35 +65,18 @@ static void insert_match(deflate_state *s, struct match match) { return; } - /* Insert new strings in the hash table only if the match length - * is not too large. This saves time but degrades compression. - */ - if (match.match_length <= 16 * s->max_insert_length && s->lookahead >= WANT_MIN_MATCH) { - match.match_length--; /* string at strstart already in table */ - match.strstart++; - - if (LIKELY(match.strstart >= match.orgstart)) { - if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) { - functable.insert_string(s, match.strstart, match.match_length); - } else { - functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); - } - } else if (match.orgstart < match.strstart + match.match_length) { - functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart); + /* Insert into hash table. */ + if (LIKELY(match.strstart >= match.orgstart)) { + if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) { + insert_string(s, match.strstart, match.match_length); + } else { + insert_string(s, match.strstart, match.orgstart - match.strstart + 1); } - match.strstart += match.match_length; - match.match_length = 0; - } else { - match.strstart += match.match_length; - match.match_length = 0; - - if (match.strstart >= (STD_MIN_MATCH - 2)) - functable.quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH); - - /* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not - * matter since it will be recomputed at next deflate call. - */ + } else if (match.orgstart < match.strstart + match.match_length) { + insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart); } + match.strstart += match.match_length; + match.match_length = 0; } static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) { @@ -199,7 +184,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { } else { hash_head = 0; if (s->lookahead >= WANT_MIN_MATCH) { - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); } current_match.strstart = (uint16_t)s->strstart; @@ -215,7 +200,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - current_match.match_length = (uint16_t)functable.longest_match(s, hash_head); + current_match.match_length = (uint16_t)FUNCTABLE_CALL(longest_match)(s, hash_head); current_match.match_start = (uint16_t)s->match_start; if (UNLIKELY(current_match.match_length < WANT_MIN_MATCH)) current_match.match_length = 1; @@ -235,7 +220,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { /* now, look ahead one */ if (LIKELY(!early_exit && s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) { s->strstart = current_match.strstart + current_match.match_length; - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); next_match.strstart = (uint16_t)s->strstart; next_match.orgstart = next_match.strstart; @@ -250,7 +235,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - next_match.match_length = (uint16_t)functable.longest_match(s, hash_head); + next_match.match_length = (uint16_t)FUNCTABLE_CALL(longest_match)(s, hash_head); next_match.match_start = (uint16_t)s->match_start; if (UNLIKELY(next_match.match_start >= next_match.strstart)) { /* this can happen due to some restarts */ diff --git a/src/native/external/zlib-ng/deflate_p.h b/src/native/external/zlib-ng/deflate_p.h index dd2021a0f59a1..abcc8b1c7c125 100644 --- a/src/native/external/zlib-ng/deflate_p.h +++ b/src/native/external/zlib-ng/deflate_p.h @@ -1,7 +1,7 @@ /* deflate_p.h -- Private inline functions and macros shared with more than * one deflate method * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * */ @@ -60,27 +60,39 @@ extern const unsigned char Z_INTERNAL zng_dist_code[]; static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) { /* c is the unmatched char */ +#ifdef LIT_MEM + s->d_buf[s->sym_next] = 0; + s->l_buf[s->sym_next++] = c; +#else s->sym_buf[s->sym_next++] = 0; s->sym_buf[s->sym_next++] = 0; s->sym_buf[s->sym_next++] = c; +#endif s->dyn_ltree[c].Freq++; Tracevv((stderr, "%c", c)); Assert(c <= (STD_MAX_MATCH-STD_MIN_MATCH), "zng_tr_tally: bad literal"); return (s->sym_next == s->sym_end); } -static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) { +static inline int zng_tr_tally_dist(deflate_state* s, uint32_t dist, uint32_t len) { /* dist: distance of matched string */ /* len: match length-STD_MIN_MATCH */ +#ifdef LIT_MEM + Assert(dist <= UINT16_MAX, "dist should fit in uint16_t"); + Assert(len <= UINT8_MAX, "len should fit in uint8_t"); + s->d_buf[s->sym_next] = (uint16_t)dist; + s->l_buf[s->sym_next++] = (uint8_t)len; +#else s->sym_buf[s->sym_next++] = (uint8_t)(dist); s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8); s->sym_buf[s->sym_next++] = (uint8_t)len; +#endif s->matches++; dist--; Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES, "zng_tr_tally: bad match"); - s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++; + s->dyn_ltree[zng_length_code[len] + LITERALS + 1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; return (s->sym_next == s->sym_end); } diff --git a/src/native/external/zlib-ng/deflate_quick.c b/src/native/external/zlib-ng/deflate_quick.c index 6dfd35df510f8..91c96ac52ed32 100644 --- a/src/native/external/zlib-ng/deflate_quick.c +++ b/src/native/external/zlib-ng/deflate_quick.c @@ -86,7 +86,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { } if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) { - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); dist = (int64_t)s->strstart - hash_head; if (dist <= MAX_DIST(s) && dist > 0) { @@ -94,7 +94,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { const uint8_t *match_start = s->window + hash_head; if (zng_memcmp_2(str_start, match_start) == 0) { - match_len = functable.compare256(str_start+2, match_start+2) + 2; + match_len = FUNCTABLE_CALL(compare256)(str_start+2, match_start+2) + 2; if (match_len >= WANT_MIN_MATCH) { if (UNLIKELY(match_len > s->lookahead)) diff --git a/src/native/external/zlib-ng/deflate_rle.c b/src/native/external/zlib-ng/deflate_rle.c index 9691b30a85c85..fb17a63c5eb26 100644 --- a/src/native/external/zlib-ng/deflate_rle.c +++ b/src/native/external/zlib-ng/deflate_rle.c @@ -1,6 +1,6 @@ /* deflate_rle.c -- compress data using RLE strategy of deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -60,7 +60,7 @@ Z_INTERNAL block_state deflate_rle(deflate_state *s, int flush) { if (match_len >= STD_MIN_MATCH) { Assert(s->strstart <= UINT16_MAX, "strstart should fit in uint16_t"); Assert(s->match_start <= UINT16_MAX, "match_start should fit in uint16_t"); - check_match(s, (Pos)s->strstart, (Pos)s->strstart - 1, match_len); + check_match(s, (Pos)s->strstart, (Pos)(s->strstart - 1), match_len); bflush = zng_tr_tally_dist(s, 1, match_len - STD_MIN_MATCH); diff --git a/src/native/external/zlib-ng/deflate_slow.c b/src/native/external/zlib-ng/deflate_slow.c index 913d828928bf8..b5bea7ab903c9 100644 --- a/src/native/external/zlib-ng/deflate_slow.c +++ b/src/native/external/zlib-ng/deflate_slow.c @@ -1,6 +1,6 @@ /* deflate_slow.c -- compress data using the slow strategy of deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -19,12 +19,12 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { int bflush; /* set if current block must be flushed */ int64_t dist; uint32_t match_len; - match_func *longest_match; + match_func longest_match; if (s->max_chain_length <= 1024) - longest_match = &functable.longest_match; + longest_match = FUNCTABLE_FPTR(longest_match); else - longest_match = &functable.longest_match_slow; + longest_match = FUNCTABLE_FPTR(longest_match_slow); /* Process the input block. */ for (;;) { @@ -61,7 +61,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - match_len = (*longest_match)(s, hash_head); + match_len = longest_match(s, hash_head); /* longest_match() sets match_start */ if (match_len <= 5 && (s->strategy == Z_FILTERED)) { @@ -130,7 +130,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { } Assert(flush != Z_NO_FLUSH, "no flush?"); if (UNLIKELY(s->match_available)) { - (void) zng_tr_tally_lit(s, s->window[s->strstart-1]); + Z_UNUSED(zng_tr_tally_lit(s, s->window[s->strstart-1])); s->match_available = 0; } s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1); diff --git a/src/native/external/zlib-ng/deflate_stored.c b/src/native/external/zlib-ng/deflate_stored.c index 6160896b3fed4..9e5acfbf96610 100644 --- a/src/native/external/zlib-ng/deflate_stored.c +++ b/src/native/external/zlib-ng/deflate_stored.c @@ -1,6 +1,6 @@ /* deflate_stored.c -- store data without compression using deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -22,7 +22,7 @@ * * deflate_stored() is written to minimize the number of times an input byte is * copied. It is most efficient with large input and output buffers, which - * maximizes the opportunites to have a single copy from next_in to next_out. + * maximizes the opportunities to have a single copy from next_in to next_out. */ Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush) { /* Smallest worthy block size when not flushing or finishing. By default diff --git a/src/native/external/zlib-ng/fallback_builtins.h b/src/native/external/zlib-ng/fallback_builtins.h index 79072a1028ecd..8303508fa1201 100644 --- a/src/native/external/zlib-ng/fallback_builtins.h +++ b/src/native/external/zlib-ng/fallback_builtins.h @@ -5,9 +5,6 @@ #if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) #include -#ifdef X86_FEATURES -# include "arch/x86/x86_features.h" -#endif /* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0. * Because of that assumption trailing_zero is not initialized and the return value is not checked. diff --git a/src/native/external/zlib-ng/functable.c b/src/native/external/zlib-ng/functable.c index 7ca05c9801673..495d11edd2008 100644 --- a/src/native/external/zlib-ng/functable.c +++ b/src/native/external/zlib-ng/functable.c @@ -2,18 +2,16 @@ * Copyright (C) 2017 Hans Kristian Rosbach * For conditions of distribution and use, see copyright notice in zlib.h */ - -#if defined(_MSC_VER) -# include -#endif +#ifndef DISABLE_RUNTIME_CPU_DETECTION #include "zbuild.h" -#include "zendian.h" -#include "crc32_braid_p.h" -#include "deflate.h" -#include "deflate_p.h" #include "functable.h" #include "cpu_features.h" +#include "arch_functions.h" + +#if defined(_MSC_VER) +# include +#endif /* Platform has pointer size atomic store */ #if defined(__GNUC__) || defined(__clang__) @@ -61,31 +59,10 @@ static void init_functable(void) { ft.crc32_fold_final = &crc32_fold_final_c; ft.crc32_fold_reset = &crc32_fold_reset_c; ft.inflate_fast = &inflate_fast_c; - ft.insert_string = &insert_string_c; - ft.quick_insert_string = &quick_insert_string_c; ft.slide_hash = &slide_hash_c; - ft.update_hash = &update_hash_c; - -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - ft.longest_match = &longest_match_unaligned_64; - ft.longest_match_slow = &longest_match_slow_unaligned_64; - ft.compare256 = &compare256_unaligned_64; -# elif defined(HAVE_BUILTIN_CTZ) - ft.longest_match = &longest_match_unaligned_32; - ft.longest_match_slow = &longest_match_slow_unaligned_32; - ft.compare256 = &compare256_unaligned_32; -# else - ft.longest_match = &longest_match_unaligned_16; - ft.longest_match_slow = &longest_match_slow_unaligned_16; - ft.compare256 = &compare256_unaligned_16; -# endif -#else - ft.longest_match = &longest_match_c; - ft.longest_match_slow = &longest_match_slow_c; - ft.compare256 = &compare256_c; -#endif - + ft.longest_match = &longest_match_generic; + ft.longest_match_slow = &longest_match_slow_generic; + ft.compare256 = &compare256_generic; // Select arch-optimized functions @@ -110,19 +87,14 @@ static void init_functable(void) { #ifdef X86_SSSE3 if (cf.x86.has_ssse3) { ft.adler32 = &adler32_ssse3; -# ifdef X86_SSE2 ft.chunkmemset_safe = &chunkmemset_safe_ssse3; ft.inflate_fast = &inflate_fast_ssse3; -# endif } #endif // X86 - SSE4.2 #ifdef X86_SSE42 if (cf.x86.has_sse42) { ft.adler32_fold_copy = &adler32_fold_copy_sse42; - ft.insert_string = &insert_string_sse42; - ft.quick_insert_string = &quick_insert_string_sse42; - ft.update_hash = &update_hash_sse42; } #endif // X86 - PCLMUL @@ -151,8 +123,9 @@ static void init_functable(void) { # endif } #endif + // X86 - AVX512 (F,DQ,BW,Vl) #ifdef X86_AVX512 - if (cf.x86.has_avx512) { + if (cf.x86.has_avx512_common) { ft.adler32 = &adler32_avx512; ft.adler32_fold_copy = &adler32_fold_copy_avx512; } @@ -164,8 +137,8 @@ static void init_functable(void) { } #endif // X86 - VPCLMULQDQ -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) - if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) { +#ifdef X86_VPCLMULQDQ_CRC + if (cf.x86.has_pclmulqdq && cf.x86.has_avx512_common && cf.x86.has_vpclmulqdq) { ft.crc32 = &crc32_vpclmulqdq; ft.crc32_fold = &crc32_fold_vpclmulqdq; ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy; @@ -206,9 +179,6 @@ static void init_functable(void) { #ifdef ARM_ACLE if (cf.arm.has_crc32) { ft.crc32 = &crc32_acle; - ft.insert_string = &insert_string_acle; - ft.quick_insert_string = &quick_insert_string_acle; - ft.update_hash = &update_hash_acle; } #endif @@ -279,12 +249,9 @@ static void init_functable(void) { FUNCTABLE_ASSIGN(ft, crc32_fold_final); FUNCTABLE_ASSIGN(ft, crc32_fold_reset); FUNCTABLE_ASSIGN(ft, inflate_fast); - FUNCTABLE_ASSIGN(ft, insert_string); FUNCTABLE_ASSIGN(ft, longest_match); FUNCTABLE_ASSIGN(ft, longest_match_slow); - FUNCTABLE_ASSIGN(ft, quick_insert_string); FUNCTABLE_ASSIGN(ft, slide_hash); - FUNCTABLE_ASSIGN(ft, update_hash); // Memory barrier for weak memory order CPUs FUNCTABLE_BARRIER(); @@ -350,11 +317,6 @@ static void inflate_fast_stub(PREFIX3(stream) *strm, uint32_t start) { functable.inflate_fast(strm, start); } -static void insert_string_stub(deflate_state* const s, uint32_t str, uint32_t count) { - init_functable(); - functable.insert_string(s, str, count); -} - static uint32_t longest_match_stub(deflate_state* const s, Pos cur_match) { init_functable(); return functable.longest_match(s, cur_match); @@ -365,21 +327,11 @@ static uint32_t longest_match_slow_stub(deflate_state* const s, Pos cur_match) { return functable.longest_match_slow(s, cur_match); } -static Pos quick_insert_string_stub(deflate_state* const s, const uint32_t str) { - init_functable(); - return functable.quick_insert_string(s, str); -} - static void slide_hash_stub(deflate_state* s) { init_functable(); functable.slide_hash(s); } -static uint32_t update_hash_stub(deflate_state* const s, uint32_t h, uint32_t val) { - init_functable(); - return functable.update_hash(s, h, val); -} - /* functable init */ Z_INTERNAL struct functable_s functable = { force_init_stub, @@ -394,10 +346,9 @@ Z_INTERNAL struct functable_s functable = { crc32_fold_final_stub, crc32_fold_reset_stub, inflate_fast_stub, - insert_string_stub, longest_match_stub, longest_match_slow_stub, - quick_insert_string_stub, slide_hash_stub, - update_hash_stub }; + +#endif diff --git a/src/native/external/zlib-ng/functable.h b/src/native/external/zlib-ng/functable.h index 9f78188e10549..173a030c660d5 100644 --- a/src/native/external/zlib-ng/functable.h +++ b/src/native/external/zlib-ng/functable.h @@ -7,14 +7,21 @@ #define FUNCTABLE_H_ #include "deflate.h" -#include "crc32_fold.h" -#include "adler32_fold.h" +#include "crc32.h" + +#ifdef DISABLE_RUNTIME_CPU_DETECTION + +# include "arch_functions.h" + +/* When compiling with native instructions it is not necessary to use functable. + * Instead we use native_ macro indicating the best available variant of arch-specific + * functions for the current platform. + */ +# define FUNCTABLE_INIT ((void)0) +# define FUNCTABLE_CALL(name) native_ ## name +# define FUNCTABLE_FPTR(name) &native_ ## name -#ifdef ZLIB_COMPAT -typedef struct z_stream_s z_stream; #else -typedef struct zng_stream_s zng_stream; -#endif struct functable_s { void (* force_init) (void); @@ -29,14 +36,20 @@ struct functable_s { uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc); uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc); void (* inflate_fast) (PREFIX3(stream) *strm, uint32_t start); - void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count); uint32_t (* longest_match) (deflate_state *const s, Pos cur_match); uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match); - Pos (* quick_insert_string)(deflate_state *const s, uint32_t str); void (* slide_hash) (deflate_state *s); - uint32_t (* update_hash) (deflate_state *const s, uint32_t h, uint32_t val); }; Z_INTERNAL extern struct functable_s functable; + +/* Explicitly indicate functions are conditionally dispatched. + */ +# define FUNCTABLE_INIT functable.force_init() +# define FUNCTABLE_CALL(name) functable.name +# define FUNCTABLE_FPTR(name) functable.name + +#endif + #endif diff --git a/src/native/external/zlib-ng/gzguts.h b/src/native/external/zlib-ng/gzguts.h index a663844b693ef..14f2391152a44 100644 --- a/src/native/external/zlib-ng/gzguts.h +++ b/src/native/external/zlib-ng/gzguts.h @@ -1,7 +1,7 @@ #ifndef GZGUTS_H_ #define GZGUTS_H_ /* gzguts.h -- zlib internal header definitions for gz* operations - * Copyright (C) 2004-2019 Mark Adler + * Copyright (C) 2004-2024 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -135,7 +135,9 @@ typedef gz_state *gz_statep; /* shared functions */ void Z_INTERNAL gz_error(gz_state *, int, const char *); - +#ifdef ZLIB_COMPAT +unsigned Z_INTERNAL gz_intmax(void); +#endif /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t value -- needed when comparing unsigned to z_off64_t, which is signed (possible z_off64_t types off_t, off64_t, and long are all signed) */ diff --git a/src/native/external/zlib-ng/gzlib.c b/src/native/external/zlib-ng/gzlib.c index e613837efb52e..b8a506b6a51f3 100644 --- a/src/native/external/zlib-ng/gzlib.c +++ b/src/native/external/zlib-ng/gzlib.c @@ -1,5 +1,5 @@ /* gzlib.c -- zlib functions common to reading and writing gzip files - * Copyright (C) 2004-2019 Mark Adler + * Copyright (C) 2004-2024 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -523,3 +523,9 @@ void Z_INTERNAL gz_error(gz_state *state, int err, const char *msg) { } (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, "%s%s%s", state->path, ": ", msg); } + +#ifdef ZLIB_COMPAT +unsigned Z_INTERNAL gz_intmax(void) { + return INT_MAX; +} +#endif diff --git a/src/native/external/zlib-ng/infback.c b/src/native/external/zlib-ng/infback.c index 9f5042b4d3dcb..307d05ca3ce03 100644 --- a/src/native/external/zlib-ng/infback.c +++ b/src/native/external/zlib-ng/infback.c @@ -43,10 +43,15 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateBackInit)(PREFIX3(stream) *strm, int32_t wi } if (strm->zfree == NULL) strm->zfree = PREFIX(zcfree); - state = ZALLOC_INFLATE_STATE(strm); - if (state == NULL) + + inflate_allocs *alloc_bufs = alloc_inflate(strm); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + state = alloc_bufs->state; + state->alloc_bufs = alloc_bufs; Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state *)state; state->dmax = 32768U; state->wbits = (unsigned int)windowBits; @@ -55,7 +60,7 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateBackInit)(PREFIX3(stream) *strm, int32_t wi state->wnext = 0; state->whave = 0; state->sane = 1; - state->chunksize = functable.chunksize(); + state->chunksize = FUNCTABLE_CALL(chunksize)(); return Z_OK; } @@ -357,7 +362,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in RESTORE(); if (state->whave < state->wsize) state->whave = state->wsize - left; - functable.inflate_fast(strm, state->wsize); + FUNCTABLE_CALL(inflate_fast)(strm, state->wsize); LOAD(); break; } @@ -504,8 +509,10 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in int32_t Z_EXPORT PREFIX(inflateBackEnd)(PREFIX3(stream) *strm) { if (strm == NULL || strm->state == NULL || strm->zfree == NULL) return Z_STREAM_ERROR; - ZFREE_STATE(strm, strm->state); - strm->state = NULL; + + /* Free allocated buffers */ + free_inflate(strm); + Tracev((stderr, "inflate: end\n")); return Z_OK; } diff --git a/src/native/external/zlib-ng/inflate.c b/src/native/external/zlib-ng/inflate.c index fe55c498e3122..956f37db7dfb4 100644 --- a/src/native/external/zlib-ng/inflate.c +++ b/src/native/external/zlib-ng/inflate.c @@ -19,7 +19,7 @@ /* function prototypes */ static int inflateStateCheck(PREFIX3(stream) *strm); -static int updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum); +static void updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum); static uint32_t syncsearch(uint32_t *have, const unsigned char *buf, uint32_t len); static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst, @@ -28,11 +28,11 @@ static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst, struct inflate_state *state = (struct inflate_state*)strm->state; #ifdef GUNZIP if (state->flags) { - functable.crc32_fold_copy(&state->crc_fold, dst, src, copy); + FUNCTABLE_CALL(crc32_fold_copy)(&state->crc_fold, dst, src, copy); } else #endif { - strm->adler = state->check = functable.adler32_fold_copy(state->check, dst, src, copy); + strm->adler = state->check = FUNCTABLE_CALL(adler32_fold_copy)(state->check, dst, src, copy); } } @@ -40,11 +40,11 @@ static inline void inf_chksum(PREFIX3(stream) *strm, const uint8_t *src, uint32_ struct inflate_state *state = (struct inflate_state*)strm->state; #ifdef GUNZIP if (state->flags) { - functable.crc32_fold(&state->crc_fold, src, len, 0); + FUNCTABLE_CALL(crc32_fold)(&state->crc_fold, src, len, 0); } else #endif { - strm->adler = state->check = functable.adler32(state->check, src, len); + strm->adler = state->check = FUNCTABLE_CALL(adler32)(state->check, src, len); } } @@ -53,7 +53,7 @@ static int inflateStateCheck(PREFIX3(stream) *strm) { if (strm == NULL || strm->zalloc == NULL || strm->zfree == NULL) return 1; state = (struct inflate_state *)strm->state; - if (state == NULL || state->strm != strm || state->mode < HEAD || state->mode > SYNC) + if (state == NULL || state->alloc_bufs == NULL || state->strm != strm || state->mode < HEAD || state->mode > SYNC) return 1; return 0; } @@ -120,13 +120,9 @@ int32_t Z_EXPORT PREFIX(inflateReset2)(PREFIX3(stream) *strm, int32_t windowBits #endif } - /* set number of window bits, free window if different */ + /* set number of window bits */ if (windowBits && (windowBits < MIN_WBITS || windowBits > MAX_WBITS)) return Z_STREAM_ERROR; - if (state->window != NULL && state->wbits != (unsigned)windowBits) { - ZFREE_WINDOW(strm, state->window); - state->window = NULL; - } /* update state and reset the rest of it */ state->wrap = wrap; @@ -134,13 +130,94 @@ int32_t Z_EXPORT PREFIX(inflateReset2)(PREFIX3(stream) *strm, int32_t windowBits return PREFIX(inflateReset)(strm); } -/* This function is hidden in ZLIB_COMPAT builds. */ +#ifdef INF_ALLOC_DEBUG +# include +# define LOGSZ(name,size) fprintf(stderr, "%s is %d bytes\n", name, size) +# define LOGSZP(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %d, padded %d\n", name, size, loc, pad) +# define LOGSZPL(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %ld, padded %d\n", name, size, loc, pad) +#else +# define LOGSZ(name,size) +# define LOGSZP(name,size,loc,pad) +# define LOGSZPL(name,size,loc,pad) +#endif + +/* =========================================================================== + * Allocate a big buffer and divide it up into the various buffers inflate needs. + * Handles alignment of allocated buffer and alignment of individual buffers. + */ +Z_INTERNAL inflate_allocs* alloc_inflate(PREFIX3(stream) *strm) { + int curr_size = 0; + + /* Define sizes */ + int window_size = INFLATE_ADJUST_WINDOW_SIZE((1 << MAX_WBITS) + 64); /* 64B padding for chunksize */ + int state_size = sizeof(inflate_state); + int alloc_size = sizeof(inflate_allocs); + + /* Calculate relative buffer positions and paddings */ + LOGSZP("window", window_size, PAD_WINDOW(curr_size), PADSZ(curr_size,WINDOW_PAD_SIZE)); + int window_pos = PAD_WINDOW(curr_size); + curr_size = window_pos + window_size; + + LOGSZP("state", state_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int state_pos = PAD_64(curr_size); + curr_size = state_pos + state_size; + + LOGSZP("alloc", alloc_size, PAD_16(curr_size), PADSZ(curr_size,16)); + int alloc_pos = PAD_16(curr_size); + curr_size = alloc_pos + alloc_size; + + /* Add 64-1 or 4096-1 to allow window alignment, and round size of buffer up to multiple of 64 */ + int total_size = PAD_64(curr_size + (WINDOW_PAD_SIZE - 1)); + + /* Allocate buffer, align to 64-byte cacheline, and zerofill the resulting buffer */ + char *original_buf = strm->zalloc(strm->opaque, 1, total_size); + if (original_buf == NULL) + return NULL; + + char *buff = (char *)HINT_ALIGNED_WINDOW((char *)PAD_WINDOW(original_buf)); + LOGSZPL("Buffer alloc", total_size, PADSZ((uintptr_t)original_buf,WINDOW_PAD_SIZE), PADSZ(curr_size,WINDOW_PAD_SIZE)); + + /* Initialize alloc_bufs */ + inflate_allocs *alloc_bufs = (struct inflate_allocs_s *)(buff + alloc_pos); + alloc_bufs->buf_start = (char *)original_buf; + alloc_bufs->zfree = strm->zfree; + + alloc_bufs->window = (unsigned char *)HINT_ALIGNED_WINDOW((buff + window_pos)); + alloc_bufs->state = (inflate_state *)HINT_ALIGNED_64((buff + state_pos)); + +#ifdef Z_MEMORY_SANITIZER + /* This is _not_ to subvert the memory sanitizer but to instead unposion some + data we willingly and purposefully load uninitialized into vector registers + in order to safely read the last < chunksize bytes of the window. */ + __msan_unpoison(alloc_bufs->window + window_size, 64); +#endif + + return alloc_bufs; +} + +/* =========================================================================== + * Free all allocated inflate buffers + */ +Z_INTERNAL void free_inflate(PREFIX3(stream) *strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + + if (state->alloc_bufs != NULL) { + inflate_allocs *alloc_bufs = state->alloc_bufs; + alloc_bufs->zfree(strm->opaque, alloc_bufs->buf_start); + strm->state = NULL; + } +} + +/* =========================================================================== + * Initialize inflate state and buffers. + * This function is hidden in ZLIB_COMPAT builds. + */ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windowBits) { int32_t ret; struct inflate_state *state; - /* Initialize functable earlier. */ - functable.force_init(); + /* Initialize functable */ + FUNCTABLE_INIT; if (strm == NULL) return Z_STREAM_ERROR; @@ -151,19 +228,23 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo } if (strm->zfree == NULL) strm->zfree = PREFIX(zcfree); - state = ZALLOC_INFLATE_STATE(strm); - if (state == NULL) + + inflate_allocs *alloc_bufs = alloc_inflate(strm); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + state = alloc_bufs->state; + state->window = alloc_bufs->window; + state->alloc_bufs = alloc_bufs; Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state *)state; state->strm = strm; - state->window = NULL; state->mode = HEAD; /* to pass state test in inflateReset2() */ - state->chunksize = functable.chunksize(); + state->chunksize = FUNCTABLE_CALL(chunksize)(); ret = PREFIX(inflateReset2)(strm, windowBits); if (ret != Z_OK) { - ZFREE_STATE(strm, state); - strm->state = NULL; + free_inflate(strm); } return ret; } @@ -222,31 +303,6 @@ void Z_INTERNAL PREFIX(fixedtables)(struct inflate_state *state) { state->distbits = 5; } -int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state) { - /* if it hasn't been done already, allocate space for the window */ - if (state->window == NULL) { - unsigned wsize = 1U << state->wbits; - state->window = (unsigned char *)ZALLOC_WINDOW(state->strm, wsize + state->chunksize, sizeof(unsigned char)); - if (state->window == NULL) - return Z_MEM_ERROR; -#ifdef Z_MEMORY_SANITIZER - /* This is _not_ to subvert the memory sanitizer but to instead unposion some - data we willingly and purposefully load uninitialized into vector registers - in order to safely read the last < chunksize bytes of the window. */ - __msan_unpoison(state->window + wsize, state->chunksize); -#endif - } - - /* if window not in use yet, initialize */ - if (state->wsize == 0) { - state->wsize = 1U << state->wbits; - state->wnext = 0; - state->whave = 0; - } - - return Z_OK; -} - /* Update the window with the last wsize (normally 32K) bytes written before returning. If window does not exist yet, create it. This is only called @@ -261,20 +317,20 @@ int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state) { output will fall in the output data, making match copies simpler and faster. The advantage may be dependent on the size of the processor's data caches. */ -static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum) { +static void updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum) { struct inflate_state *state; uint32_t dist; state = (struct inflate_state *)strm->state; - if (PREFIX(inflate_ensure_window)(state)) return 1; + /* if window not in use yet, initialize */ + if (state->wsize == 0) + state->wsize = 1U << state->wbits; /* len state->wsize or less output bytes into the circular window */ if (len >= state->wsize) { /* Only do this if the caller specifies to checksum bytes AND the platform requires - * it (s/390 being the primary exception to this. Also, for now, do the adler checksums - * if not a gzip based header. The inline adler checksums will come in the near future, - * possibly the next commit */ + * it (s/390 being the primary exception to this) */ if (INFLATE_NEED_CHECKSUM(strm) && cksum) { /* We have to split the checksum over non-copied and copied bytes */ if (len > state->wsize) @@ -314,7 +370,6 @@ static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t state->whave += dist; } } - return 0; } /* @@ -636,7 +691,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } /* compute crc32 checksum if not in raw mode */ if ((state->wrap & 4) && state->flags) - strm->adler = state->check = functable.crc32_fold_reset(&state->crc_fold); + strm->adler = state->check = FUNCTABLE_CALL(crc32_fold_reset)(&state->crc_fold); state->mode = TYPE; break; #endif @@ -867,7 +922,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { /* use inflate_fast() if we have enough input and output */ if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_LEFT) { RESTORE(); - functable.inflate_fast(strm, out); + FUNCTABLE_CALL(inflate_fast)(strm, out); LOAD(); if (state->mode == TYPE) state->back = -1; @@ -1026,7 +1081,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } else { copy = MIN(state->length, left); - put = functable.chunkmemset_safe(put, state->offset, copy, left); + put = FUNCTABLE_CALL(chunkmemset_safe)(put, state->offset, copy, left); } left -= copy; state->length -= copy; @@ -1056,7 +1111,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } #ifdef GUNZIP if (state->flags) - strm->adler = state->check = functable.crc32_fold_final(&state->crc_fold); + strm->adler = state->check = FUNCTABLE_CALL(crc32_fold_final)(&state->crc_fold); #endif } out = left; @@ -1098,9 +1153,6 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { ret = Z_DATA_ERROR; goto inf_leave; - case MEM: - return Z_MEM_ERROR; - case SYNC: default: /* can't happen, but makes compilers happy */ @@ -1111,7 +1163,6 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { Return from inflate(), updating the total counts and the check value. If there was no progress during the inflate() call, return a buffer error. Call updatewindow() to create and/or update the window state. - Note: a memory error from inflate() is non-recoverable. */ inf_leave: RESTORE(); @@ -1120,10 +1171,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { (state->wsize || (out != strm->avail_out && state->mode < BAD && (state->mode < CHECK || flush != Z_FINISH)))) { /* update sliding window with respective checksum if not in "raw" mode */ - if (updatewindow(strm, strm->next_out, check_bytes, state->wrap & 4)) { - state->mode = MEM; - return Z_MEM_ERROR; - } + updatewindow(strm, strm->next_out, check_bytes, state->wrap & 4); } in -= strm->avail_in; out -= strm->avail_out; @@ -1144,14 +1192,12 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } int32_t Z_EXPORT PREFIX(inflateEnd)(PREFIX3(stream) *strm) { - struct inflate_state *state; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (state->window != NULL) - ZFREE_WINDOW(strm, state->window); - ZFREE_STATE(strm, strm->state); - strm->state = NULL; + + /* Free allocated buffers */ + free_inflate(strm); + Tracev((stderr, "inflate: end\n")); return Z_OK; } @@ -1179,7 +1225,6 @@ int32_t Z_EXPORT PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm, uint8_t *di int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8_t *dictionary, uint32_t dictLength) { struct inflate_state *state; unsigned long dictid; - int32_t ret; /* check state */ if (inflateStateCheck(strm)) @@ -1190,7 +1235,7 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 /* check for correct dictionary identifier */ if (state->mode == DICT) { - dictid = functable.adler32(ADLER32_INITIAL_VALUE, dictionary, dictLength); + dictid = FUNCTABLE_CALL(adler32)(ADLER32_INITIAL_VALUE, dictionary, dictLength); if (dictid != state->check) return Z_DATA_ERROR; } @@ -1199,11 +1244,8 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 /* copy dictionary to window using updatewindow(), which will amend the existing dictionary if appropriate */ - ret = updatewindow(strm, dictionary + dictLength, dictLength, 0); - if (ret) { - state->mode = MEM; - return Z_MEM_ERROR; - } + updatewindow(strm, dictionary + dictLength, dictLength, 0); + state->havedict = 1; Tracev((stderr, "inflate: dictionary set\n")); return Z_OK; @@ -1271,7 +1313,7 @@ int32_t Z_EXPORT PREFIX(inflateSync)(PREFIX3(stream) *strm) { /* if first time, start search in bit buffer */ if (state->mode != SYNC) { state->mode = SYNC; - state->hold <<= state->bits & 7; + state->hold >>= state->bits & 7; state->bits -= state->bits & 7; len = 0; while (state->bits >= 8) { @@ -1334,30 +1376,28 @@ int32_t Z_EXPORT PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou return Z_STREAM_ERROR; state = (struct inflate_state *)source->state; + /* copy stream */ + memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); + /* allocate space */ - copy = ZALLOC_INFLATE_STATE(source); - if (copy == NULL) + inflate_allocs *alloc_bufs = alloc_inflate(dest); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + copy = alloc_bufs->state; /* copy state */ - memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); - ZCOPY_INFLATE_STATE(copy, state); + memcpy(copy, state, sizeof(struct inflate_state)); copy->strm = dest; if (state->lencode >= state->codes && state->lencode <= state->codes + ENOUGH - 1) { copy->lencode = copy->codes + (state->lencode - state->codes); copy->distcode = copy->codes + (state->distcode - state->codes); } copy->next = copy->codes + (state->next - state->codes); + copy->window = alloc_bufs->window; + copy->alloc_bufs = alloc_bufs; /* window */ - copy->window = NULL; - if (state->window != NULL) { - if (PREFIX(inflate_ensure_window)(copy)) { - ZFREE_STATE(source, copy); - return Z_MEM_ERROR; - } - ZCOPY_WINDOW(copy->window, state->window, (size_t)state->wsize); - } + memcpy(copy->window, state->window, INFLATE_ADJUST_WINDOW_SIZE((size_t)state->wsize)); dest->state = (struct internal_state *)copy; return Z_OK; diff --git a/src/native/external/zlib-ng/inflate.h b/src/native/external/zlib-ng/inflate.h index 39cdf5d683c39..536da7d1f8fce 100644 --- a/src/native/external/zlib-ng/inflate.h +++ b/src/native/external/zlib-ng/inflate.h @@ -11,8 +11,12 @@ #ifndef INFLATE_H_ #define INFLATE_H_ -#include "adler32_fold.h" -#include "crc32_fold.h" +#include "crc32.h" + +#ifdef S390_DFLTCC_INFLATE +# include "arch/s390/dfltcc_common.h" +# define HAVE_ARCH_INFLATE_STATE +#endif /* define NO_GZIP when compiling if you want to disable gzip header and trailer decoding by inflate(). NO_GZIP would be used to avoid linking in the crc code when it is not needed. @@ -53,14 +57,13 @@ typedef enum { LENGTH, /* i: waiting for 32-bit length (gzip) */ DONE, /* finished check, done -- remain here until reset */ BAD, /* got a data error -- remain here until reset */ - MEM, /* got an inflate() memory error -- remain here until reset */ SYNC /* looking for synchronization bytes to restart inflate() */ } inflate_mode; /* State transitions between above modes - - (most modes can go to BAD or MEM on error -- not shown for clarity) + (most modes can go to BAD on error -- not shown for clarity) Process header: HEAD -> (gzip) or (zlib) or (raw) @@ -81,10 +84,19 @@ typedef enum { Process trailer: CHECK -> LENGTH -> DONE */ +typedef struct inflate_state inflate_state; + +/* Struct for memory allocation handling */ +typedef struct inflate_allocs_s { + char *buf_start; + free_func zfree; + inflate_state *state; + unsigned char *window; +} inflate_allocs; /* State maintained between inflate() calls -- approximately 7K bytes, not including the allocated sliding window, which is up to 32K bytes. */ -struct inflate_state { +struct ALIGNED_(64) inflate_state { PREFIX3(stream) *strm; /* pointer back to this zlib stream */ inflate_mode mode; /* current inflate mode */ int last; /* true if processing last block */ @@ -132,9 +144,14 @@ struct inflate_state { int back; /* bits back of last unprocessed length/lit */ unsigned was; /* initial length of match */ uint32_t chunksize; /* size of memory copying chunk */ + inflate_allocs *alloc_bufs; /* struct for handling memory allocations */ +#ifdef HAVE_ARCH_INFLATE_STATE + arch_inflate_state arch; /* architecture-specific extensions */ +#endif }; -int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state); void Z_INTERNAL PREFIX(fixedtables)(struct inflate_state *state); +Z_INTERNAL inflate_allocs* alloc_inflate(PREFIX3(stream) *strm); +Z_INTERNAL void free_inflate(PREFIX3(stream) *strm); #endif /* INFLATE_H_ */ diff --git a/src/native/external/zlib-ng/inflate_p.h b/src/native/external/zlib-ng/inflate_p.h index eff73876daf25..c324b0486a140 100644 --- a/src/native/external/zlib-ng/inflate_p.h +++ b/src/native/external/zlib-ng/inflate_p.h @@ -10,15 +10,16 @@ /* Architecture-specific hooks. */ #ifdef S390_DFLTCC_INFLATE # include "arch/s390/dfltcc_inflate.h" +/* DFLTCC instructions require window to be page-aligned */ +# define PAD_WINDOW PAD_4096 +# define WINDOW_PAD_SIZE 4096 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_4096 #else -/* Memory management for the inflate state. Useful for allocating arch-specific extension blocks. */ -# define ZALLOC_INFLATE_STATE(strm) ((struct inflate_state *)ZALLOC(strm, 1, sizeof(struct inflate_state))) -# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) -# define ZCOPY_INFLATE_STATE(dst, src) memcpy(dst, src, sizeof(struct inflate_state)) -/* Memory management for the window. Useful for allocation the aligned window. */ -# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) -# define ZCOPY_WINDOW(dest, src, n) memcpy(dest, src, n) -# define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) +# define PAD_WINDOW PAD_64 +# define WINDOW_PAD_SIZE 64 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_64 +/* Adjust the window size for the arch-specific inflate code. */ +# define INFLATE_ADJUST_WINDOW_SIZE(n) (n) /* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ # define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) /* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ @@ -46,9 +47,9 @@ /* check function to use adler32() for zlib or crc32() for gzip */ #ifdef GUNZIP # define UPDATE(check, buf, len) \ - (state->flags ? PREFIX(crc32)(check, buf, len) : functable.adler32(check, buf, len)) + (state->flags ? PREFIX(crc32)(check, buf, len) : FUNCTABLE_CALL(adler32)(check, buf, len)) #else -# define UPDATE(check, buf, len) functable.adler32(check, buf, len) +# define UPDATE(check, buf, len) FUNCTABLE_CALL(adler32)(check, buf, len) #endif /* check macros for header crc */ diff --git a/src/native/external/zlib-ng/inftrees.c b/src/native/external/zlib-ng/inftrees.c index 423f7b461d7c6..5234fe7ae0cdd 100644 --- a/src/native/external/zlib-ng/inftrees.c +++ b/src/native/external/zlib-ng/inftrees.c @@ -1,5 +1,5 @@ /* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2023 Mark Adler + * Copyright (C) 1995-2024 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -7,7 +7,7 @@ #include "zutil.h" #include "inftrees.h" -const char PREFIX(inflate_copyright)[] = " inflate 1.3.0 Copyright 1995-2023 Mark Adler "; +const char PREFIX(inflate_copyright)[] = " inflate 1.3.1 Copyright 1995-2024 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -54,7 +54,7 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const uint16_t lext[31] = { /* Length codes 257..285 extra */ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77}; static const uint16_t dbase[32] = { /* Distance codes 0..29 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, diff --git a/src/native/external/zlib-ng/insert_string.c b/src/native/external/zlib-ng/insert_string.c index cfe39837f86a5..11a5b97ffe86f 100644 --- a/src/native/external/zlib-ng/insert_string.c +++ b/src/native/external/zlib-ng/insert_string.c @@ -1,6 +1,6 @@ /* insert_string.c -- insert_string integer hash variant * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * */ @@ -10,12 +10,12 @@ #define HASH_SLIDE 16 -#define HASH_CALC(s, h, val) h = ((val * 2654435761U) >> HASH_SLIDE); +#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE); #define HASH_CALC_VAR h #define HASH_CALC_VAR_INIT uint32_t h = 0 -#define UPDATE_HASH update_hash_c -#define INSERT_STRING insert_string_c -#define QUICK_INSERT_STRING quick_insert_string_c +#define UPDATE_HASH update_hash +#define INSERT_STRING insert_string +#define QUICK_INSERT_STRING quick_insert_string #include "insert_string_tpl.h" diff --git a/src/native/external/zlib-ng/insert_string_roll.c b/src/native/external/zlib-ng/insert_string_roll.c index dfea347bccb75..8693f96f59021 100644 --- a/src/native/external/zlib-ng/insert_string_roll.c +++ b/src/native/external/zlib-ng/insert_string_roll.c @@ -1,6 +1,6 @@ /* insert_string_roll.c -- insert_string rolling hash variant * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * */ @@ -10,7 +10,7 @@ #define HASH_SLIDE 5 -#define HASH_CALC(s, h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val)) +#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val)) #define HASH_CALC_VAR s->ins_h #define HASH_CALC_VAR_INIT #define HASH_CALC_READ val = strstart[0] diff --git a/src/native/external/zlib-ng/insert_string_tpl.h b/src/native/external/zlib-ng/insert_string_tpl.h index c84617730ac37..281c013463104 100644 --- a/src/native/external/zlib-ng/insert_string_tpl.h +++ b/src/native/external/zlib-ng/insert_string_tpl.h @@ -1,10 +1,10 @@ #ifndef INSERT_STRING_H_ #define INSERT_STRING_H_ -/* insert_string.h -- Private insert_string functions shared with more than - * one insert string implementation +/* insert_string_tpl.h -- Private insert_string functions shared with more than + * one insert string implementation * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * * Copyright (C) 2013 Intel Corporation. All rights reserved. * Authors: @@ -47,9 +47,8 @@ * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ -Z_INTERNAL uint32_t UPDATE_HASH(deflate_state *const s, uint32_t h, uint32_t val) { - (void)s; - HASH_CALC(s, h, val); +Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) { + HASH_CALC(h, val); return h & HASH_CALC_MASK; } @@ -65,7 +64,7 @@ Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) { HASH_CALC_VAR_INIT; HASH_CALC_READ; - HASH_CALC(s, HASH_CALC_VAR, val); + HASH_CALC(HASH_CALC_VAR, val); HASH_CALC_VAR &= HASH_CALC_MASK; hm = HASH_CALC_VAR; @@ -94,7 +93,7 @@ Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t cou HASH_CALC_VAR_INIT; HASH_CALC_READ; - HASH_CALC(s, HASH_CALC_VAR, val); + HASH_CALC(HASH_CALC_VAR, val); HASH_CALC_VAR &= HASH_CALC_MASK; hm = HASH_CALC_VAR; diff --git a/src/native/external/zlib-ng/match_tpl.h b/src/native/external/zlib-ng/match_tpl.h index d076798520eed..9c258242cd764 100644 --- a/src/native/external/zlib-ng/match_tpl.h +++ b/src/native/external/zlib-ng/match_tpl.h @@ -1,6 +1,6 @@ /* match_tpl.h -- find longest match template for compare256 variants * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * * Portions copyright (C) 2014-2021 Konstantin Nosov @@ -8,11 +8,6 @@ * https://github.com/gildor2/fast_zlib */ -#include "zbuild.h" -#include "zutil_p.h" -#include "deflate.h" -#include "functable.h" - #ifndef MATCH_TPL_H #define MATCH_TPL_H @@ -107,11 +102,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because * these strings are not yet inserted into the hash table. */ - hash = s->update_hash(s, 0, scan[1]); - hash = s->update_hash(s, hash, scan[2]); + hash = s->update_hash(0, scan[1]); + hash = s->update_hash(hash, scan[2]); for (i = 3; i <= best_len; i++) { - hash = s->update_hash(s, hash, scan[i]); + hash = s->update_hash(hash, scan[i]); /* If we're starting with best_len >= 3, we can use offset search. */ pos = s->head[hash]; @@ -241,9 +236,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { */ scan_endstr = scan + len - (STD_MIN_MATCH+1); - hash = s->update_hash(s, 0, scan_endstr[0]); - hash = s->update_hash(s, hash, scan_endstr[1]); - hash = s->update_hash(s, hash, scan_endstr[2]); + hash = s->update_hash(0, scan_endstr[0]); + hash = s->update_hash(hash, scan_endstr[1]); + hash = s->update_hash(hash, scan_endstr[2]); pos = s->head[hash]; if (pos < cur_match) { diff --git a/src/native/external/zlib-ng/tools/makecrct.c b/src/native/external/zlib-ng/tools/makecrct.c index 5c3ba58a1a623..9e65d2495475f 100644 --- a/src/native/external/zlib-ng/tools/makecrct.c +++ b/src/native/external/zlib-ng/tools/makecrct.c @@ -21,9 +21,6 @@ static uint32_t crc_table[256]; static z_word_t crc_big_table[256]; - -static uint32_t crc_braid_table[W][256]; -static z_word_t crc_braid_big_table[W][256]; static uint32_t x2n_table[32]; #include "crc32_braid_comb_p.h" @@ -80,9 +77,6 @@ static void make_crc_table(void) { x2n_table[0] = p; for (n = 1; n < 32; n++) x2n_table[n] = p = multmodp(p, p); - - /* initialize the braiding tables -- needs x2n_table[] */ - braid(crc_braid_table, crc_braid_big_table, N, W); } /* diff --git a/src/native/external/zlib-ng/trees.c b/src/native/external/zlib-ng/trees.c index 5bb88389baa33..9f2f49137f08a 100644 --- a/src/native/external/zlib-ng/trees.c +++ b/src/native/external/zlib-ng/trees.c @@ -1,5 +1,5 @@ /* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2021 Jean-loup Gailly + * Copyright (C) 1995-2024 Jean-loup Gailly * detect_data_type() function provided freely by Cosmin Truta, 2006 * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -75,7 +75,6 @@ static int build_bl_tree (deflate_state *s); static void send_all_trees (deflate_state *s, int lcodes, int dcodes, int blcodes); static void compress_block (deflate_state *s, const ct_data *ltree, const ct_data *dtree); static int detect_data_type (deflate_state *s); -static void bi_flush (deflate_state *s); /* =========================================================================== * Initialize the tree data structures for a new zlib stream. @@ -609,13 +608,6 @@ void Z_INTERNAL zng_tr_stored_block(deflate_state *s, char *buf, uint32_t stored } } -/* =========================================================================== - * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) - */ -void Z_INTERNAL zng_tr_flush_bits(deflate_state *s) { - bi_flush(s); -} - /* =========================================================================== * Send one empty static block to give enough lookahead for inflate. * This takes 10 bits, of which 7 may remain in the bit buffer. @@ -623,7 +615,7 @@ void Z_INTERNAL zng_tr_flush_bits(deflate_state *s) { void Z_INTERNAL zng_tr_align(deflate_state *s) { zng_tr_emit_tree(s, STATIC_TREES, 0); zng_tr_emit_end_block(s, static_ltree, 0); - bi_flush(s); + zng_tr_flush_bits(s); } /* =========================================================================== @@ -718,21 +710,30 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data /* dtree: distance tree */ unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ - unsigned sx = 0; /* running index in sym_buf */ + unsigned sx = 0; /* running index in symbol buffers */ if (s->sym_next != 0) { do { +#ifdef LIT_MEM + dist = s->d_buf[sx]; + lc = s->l_buf[sx++]; +#else dist = s->sym_buf[sx++] & 0xff; dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; lc = s->sym_buf[sx++]; +#endif if (dist == 0) { zng_emit_lit(s, ltree, lc); } else { zng_emit_dist(s, ltree, dtree, lc, dist); } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and sym_buf is ok: */ + /* Check for no overlay of pending_buf on needed symbols */ +#ifdef LIT_MEM + Assert(s->pending < 2 * (s->lit_bufsize + sx), "pending_buf overflow"); +#else Assert(s->pending < s->lit_bufsize + sx, "pending_buf overflow"); +#endif } while (sx < s->sym_next); } @@ -781,27 +782,26 @@ static int detect_data_type(deflate_state *s) { /* =========================================================================== * Flush the bit buffer, keeping at most 7 bits in it. */ -static void bi_flush(deflate_state *s) { - if (s->bi_valid == 64) { - put_uint64(s, s->bi_buf); - s->bi_buf = 0; - s->bi_valid = 0; - } else { - if (s->bi_valid >= 32) { - put_uint32(s, (uint32_t)s->bi_buf); - s->bi_buf >>= 32; - s->bi_valid -= 32; - } - if (s->bi_valid >= 16) { - put_short(s, (uint16_t)s->bi_buf); - s->bi_buf >>= 16; - s->bi_valid -= 16; - } - if (s->bi_valid >= 8) { - put_byte(s, s->bi_buf); - s->bi_buf >>= 8; - s->bi_valid -= 8; - } +void Z_INTERNAL zng_tr_flush_bits(deflate_state *s) { + if (s->bi_valid >= 48) { + put_uint32(s, (uint32_t)s->bi_buf); + put_short(s, (uint16_t)(s->bi_buf >> 32)); + s->bi_buf >>= 48; + s->bi_valid -= 48; + } else if (s->bi_valid >= 32) { + put_uint32(s, (uint32_t)s->bi_buf); + s->bi_buf >>= 32; + s->bi_valid -= 32; + } + if (s->bi_valid >= 16) { + put_short(s, (uint16_t)s->bi_buf); + s->bi_buf >>= 16; + s->bi_valid -= 16; + } + if (s->bi_valid >= 8) { + put_byte(s, s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; } } diff --git a/src/native/external/zlib-ng/win32/Makefile.a64 b/src/native/external/zlib-ng/win32/Makefile.a64 index 2a0f3cfe4e7b2..9f8d6fb7facca 100644 --- a/src/native/external/zlib-ng/win32/Makefile.a64 +++ b/src/native/external/zlib-ng/win32/Makefile.a64 @@ -1,5 +1,5 @@ # Makefile for zlib using Microsoft (Visual) C -# zlib is copyright (C) 1995-2006 Jean-loup Gailly and Mark Adler +# zlib is copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler # # Usage: # nmake -f win32/Makefile.a64 (standard build) @@ -23,13 +23,14 @@ LD = link AR = lib RC = rc CP = copy /y -CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) +INCLUDES = -I$(TOP) -I$(TOP)/arch/arm -I$(TOP)/arch/generic +CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) WFLAGS = \ -D_ARM64_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ -D_CRT_SECURE_NO_DEPRECATE \ -D_CRT_NONSTDC_NO_DEPRECATE \ - -DARM_NEON_HASLD4 \ -DARM_FEATURES \ + -DARM_NEON_HASLD4 \ # LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest ARFLAGS = -nologo @@ -43,20 +44,22 @@ SUFFIX = OBJS = \ adler32.obj \ - adler32_fold.obj \ + adler32_c.obj \ + adler32_fold_c.obj \ arm_features.obj \ - chunkset.obj \ - compare256.obj \ + chunkset_c.obj \ + compare256_c.obj \ compress.obj \ cpu_features.obj \ - crc32_braid.obj \ + crc32.obj \ + crc32_braid_c.obj \ crc32_braid_comb.obj \ - crc32_fold.obj \ + crc32_fold_c.obj \ deflate.obj \ deflate_fast.obj \ deflate_huff.obj \ - deflate_quick.obj \ deflate_medium.obj \ + deflate_quick.obj \ deflate_rle.obj \ deflate_slow.obj \ deflate_stored.obj \ @@ -66,7 +69,7 @@ OBJS = \ inftrees.obj \ insert_string.obj \ insert_string_roll.obj \ - slide_hash.obj \ + slide_hash_c.obj \ trees.obj \ uncompr.obj \ zutil.obj \ @@ -96,7 +99,7 @@ WFLAGS = $(WFLAGS) \ -DARM_NEON \ -DARM_NOCHECK_NEON \ # -OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj +OBJS = $(OBJS) crc32_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj # targets all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ @@ -156,57 +159,59 @@ minigzip_d.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) {$(TOP)}.c.obj: $(CC) -c $(WFLAGS) $(CFLAGS) $< -gzlib2.obj: gzlib.c +gzlib2.obj: gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzlib2.obj gzlib.c -gzread2.obj: gzread.c +gzread2.obj: gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzread2.obj gzread.c -gzwrite2.obj: gzwrite.c +gzwrite2.obj: gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzwrite2.obj gzwrite.c {$(TOP)/arch/arm}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + {$(TOP)/test}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< $(TOP)/zconf$(SUFFIX).h: zconf -SRCDIR = $(TOP) -# Keep the dependences in sync with top-level Makefile.in -adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h -adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h -chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h -gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h -uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h -cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h -crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h -crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h -deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_quick.obj: $(SRCDIR)/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/trees_emit.h -deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h -inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h -inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h -slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h -slide_hash_neon.obj: $(SRCDIR)/arch/arm/slide_hash_neon.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h -trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h -zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h - -example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h - -minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h +adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h +chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h +cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h +crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h +crc32_braid_c.obj: $(TOP)/arch/generic/crc32_braid_c.c $(TOP)/zbuild.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h +crc32_braid_comb.obj: $(TOP)/crc32_braid_comb.c $(TOP)/zutil.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h $(TOP)/crc32_braid_comb_p.h +crc32_fold_c.obj: $(TOP)/arch/generic/crc32_fold_c.c $(TOP)/zbuild.h $(TOP)/crc32.h $(TOP)/functable.h $(TOP)/zutil.h +deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h +deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h +deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +functable.obj: $(TOP)/functable.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/cpu_features.h $(TOP)/arch/arm/arm_features.h $(TOP)/arch_functions.h +gzlib.obj: $(TOP)/gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzread.obj: $(TOP)/gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzwrite.obj: $(TOP)/gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +infback.obj: $(TOP)/infback.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h +inflate.obj: $(TOP)/inflate.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h $(TOP)/inffixed_tbl.h +inftrees.obj: $(TOP)/inftrees.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h +insert_string.obj: $(TOP)/insert_string.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +insert_string_roll.obj: $(TOP)/insert_string_roll.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +slide_hash_c.obj: $(TOP)/arch/generic/slide_hash_c.c $(TOP)/zbuild.h $(TOP)/deflate.h +slide_hash_neon.obj: $(TOP)/arch/arm/slide_hash_neon.c $(TOP)/arch/arm/neon_intrins.h $(TOP)/zbuild.h $(TOP)/deflate.h +trees.obj: $(TOP)/trees.c $(TOP)/trees.h $(TOP)/trees_emit.h $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/trees_tbl.h +uncompr.obj: $(TOP)/uncompr.c $(TOP)/zbuild.h $(TOP)/zutil.h +zutil.obj: $(TOP)/zutil.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/zutil_p.h $(RESFILE): $(TOP)/win32/$(RCFILE) $(RC) $(RCFLAGS) /fo$@ $(TOP)/win32/$(RCFILE) @@ -220,6 +225,10 @@ testdll: example_d.exe minigzip_d.exe example_d echo hello world | minigzip_d | minigzip_d -d +example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h $(TOP)/deflate.h $(TOP)/test/test_shared_ng.h + +minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h + # cleanup clean: diff --git a/src/native/external/zlib-ng/win32/Makefile.arm b/src/native/external/zlib-ng/win32/Makefile.arm index 34dfe6bba6e1a..cab999dfe036d 100644 --- a/src/native/external/zlib-ng/win32/Makefile.arm +++ b/src/native/external/zlib-ng/win32/Makefile.arm @@ -1,5 +1,5 @@ # Makefile for zlib using Microsoft (Visual) C -# zlib is copyright (C) 1995-2006 Jean-loup Gailly and Mark Adler +# zlib is copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler # # Usage: # nmake -f win32/Makefile.arm (standard build) @@ -23,7 +23,8 @@ LD = link AR = lib RC = rc CP = copy /y -CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) +INCLUDES = -I$(TOP) -I$(TOP)/arch/arm -I$(TOP)/arch/generic +CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) WFLAGS = \ -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ -D_CRT_SECURE_NO_DEPRECATE \ @@ -48,15 +49,17 @@ SUFFIX = OBJS = \ adler32.obj \ - adler32_fold.obj \ + adler32_c.obj \ + adler32_fold_c.obj \ arm_features.obj \ - chunkset.obj \ - compare256.obj \ + chunkset_c.obj \ + compare256_c.obj \ compress.obj \ cpu_features.obj \ - crc32_braid.obj \ + crc32.obj \ + crc32_braid_c.obj \ crc32_braid_comb.obj \ - crc32_fold.obj \ + crc32_fold_c.obj \ deflate.obj \ deflate_fast.obj \ deflate_huff.obj \ @@ -71,7 +74,7 @@ OBJS = \ inftrees.obj \ insert_string.obj \ insert_string_roll.obj \ - slide_hash.obj \ + slide_hash_c.obj \ trees.obj \ uncompr.obj \ zutil.obj \ @@ -97,7 +100,7 @@ OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj !if "$(WITH_ACLE)" != "" WFLAGS = $(WFLAGS) -DARM_ACLE -OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj +OBJS = $(OBJS) crc32_acle.obj !endif !if "$(WITH_VFPV3)" != "" NEON_ARCH = /arch:VFPv3 @@ -177,56 +180,58 @@ minigzip_d.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) {$(TOP)}.c.obj: $(CC) -c $(WFLAGS) $(CFLAGS) $< -gzlib2.obj: gzlib.c +gzlib2.obj: gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzlib2.obj gzlib.c -gzread2.obj: gzread.c +gzread2.obj: gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzread2.obj gzread.c -gzwrite2.obj: gzwrite.c +gzwrite2.obj: gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzwrite2.obj gzwrite.c {$(TOP)/arch/arm}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + {$(TOP)/test}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< $(TOP)/zconf$(SUFFIX).h: zconf -SRCDIR = $(TOP) -# Keep the dependences in sync with top-level Makefile.in -adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h -adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h -functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h -gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h -uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h -chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h -crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h -crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h -deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_quick.obj: $(SRCDIR)/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/trees_emit.h -deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h -inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h -inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h -slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h -trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h -zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h - -example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h - -minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h +adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h +chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h +cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h +crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h +crc32_braid_c.obj: $(TOP)/arch/generic/crc32_braid_c.c $(TOP)/zbuild.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h +crc32_braid_comb.obj: $(TOP)/crc32_braid_comb.c $(TOP)/zutil.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h $(TOP)/crc32_braid_comb_p.h +crc32_fold_c.obj: $(TOP)/arch/generic/crc32_fold_c.c $(TOP)/zbuild.h $(TOP)/crc32.h $(TOP)/functable.h $(TOP)/zutil.h +deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h +deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h +deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +functable.obj: $(TOP)/functable.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/cpu_features.h $(TOP)/arch/arm/arm_features.h $(TOP)/arch_functions.h +gzlib.obj: $(TOP)/gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzread.obj: $(TOP)/gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzwrite.obj: $(TOP)/gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +infback.obj: $(TOP)/infback.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h +inflate.obj: $(TOP)/inflate.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h $(TOP)/inffixed_tbl.h +inftrees.obj: $(TOP)/inftrees.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h +insert_string.obj: $(TOP)/insert_string.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +insert_string_roll.obj: $(TOP)/insert_string_roll.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +slide_hash_c.obj: $(TOP)/arch/generic/slide_hash_c.c $(TOP)/zbuild.h $(TOP)/deflate.h +trees.obj: $(TOP)/trees.c $(TOP)/trees.h $(TOP)/trees_emit.h $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/trees_tbl.h +uncompr.obj: $(TOP)/uncompr.c $(TOP)/zbuild.h $(TOP)/zutil.h +zutil.obj: $(TOP)/zutil.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/zutil_p.h $(RESFILE): $(TOP)/win32/$(RCFILE) $(RC) $(RCFLAGS) /fo$@ $(TOP)/win32/$(RCFILE) @@ -240,6 +245,10 @@ testdll: example_d.exe minigzip_d.exe example_d echo hello world | minigzip_d | minigzip_d -d +example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h $(TOP)/deflate.h $(TOP)/test/test_shared_ng.h + +minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h + # cleanup clean: diff --git a/src/native/external/zlib-ng/win32/Makefile.msc b/src/native/external/zlib-ng/win32/Makefile.msc index 3035072bec765..8392fe46e7e89 100644 --- a/src/native/external/zlib-ng/win32/Makefile.msc +++ b/src/native/external/zlib-ng/win32/Makefile.msc @@ -1,5 +1,5 @@ # Makefile for zlib using Microsoft (Visual) C -# zlib is copyright (C) 1995-2006 Jean-loup Gailly and Mark Adler +# zlib is copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler # # Usage: # nmake -f win32/Makefile.msc (standard build) @@ -19,11 +19,14 @@ IMPLIB = zdll.lib SYMBOL_PREFIX = CC = cl +CXX = cl LD = link AR = lib RC = rc CP = copy /y -CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) +INCLUDES = -I$(TOP) -I$(TOP)/arch/x86 -I$(TOP)/arch/generic +CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) +CXXFLAGS = -nologo -EHsc -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) WFLAGS = \ -D_CRT_SECURE_NO_DEPRECATE \ -D_CRT_NONSTDC_NO_DEPRECATE \ @@ -46,24 +49,26 @@ SUFFIX = OBJS = \ adler32.obj \ + adler32_c.obj \ adler32_avx2.obj \ adler32_avx512.obj \ adler32_avx512_vnni.obj \ adler32_sse42.obj \ adler32_ssse3.obj \ - adler32_fold.obj \ - chunkset.obj \ + adler32_fold_c.obj \ + chunkset_c.obj \ chunkset_avx2.obj \ chunkset_sse2.obj \ chunkset_ssse3.obj \ - compare256.obj \ + compare256_c.obj \ compare256_avx2.obj \ compare256_sse2.obj \ compress.obj \ cpu_features.obj \ - crc32_braid.obj \ + crc32.obj \ + crc32_braid_c.obj \ crc32_braid_comb.obj \ - crc32_fold.obj \ + crc32_fold_c.obj \ crc32_pclmulqdq.obj \ deflate.obj \ deflate_fast.obj \ @@ -79,8 +84,7 @@ OBJS = \ inftrees.obj \ insert_string.obj \ insert_string_roll.obj \ - insert_string_sse42.obj \ - slide_hash.obj \ + slide_hash_c.obj \ slide_hash_avx2.obj \ slide_hash_sse2.obj \ trees.obj \ @@ -142,6 +146,11 @@ $(SHAREDLIB): zconf $(TOP)/win32/$(DEFFILE) $(OBJS) $(RESFILE) if exist $@.manifest \ mt -nologo -manifest $@.manifest -outputresource:$@;2 +depcheck.exe: depcheck.obj + $(LD) $(LDFLAGS) depcheck.obj + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + example.exe: example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) $(LD) $(LDFLAGS) example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) if exist $@.manifest \ @@ -165,76 +174,88 @@ minigzip_d.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) {$(TOP)}.c.obj: $(CC) -c $(WFLAGS) $(CFLAGS) $< -gzlib2.obj: gzlib.c +gzlib2.obj: gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzlib2.obj gzlib.c -gzread2.obj: gzread.c +gzread2.obj: gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzread2.obj gzread.c -gzwrite2.obj: gzwrite.c +gzwrite2.obj: gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzwrite2.obj gzwrite.c {$(TOP)/arch/x86}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + {$(TOP)/test}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< $(TOP)/zconf$(SUFFIX).h: zconf -SRCDIR = $(TOP) -# Keep the dependences in sync with top-level Makefile.in -adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h -adler32_avx2.obj: $(SRCDIR)/arch/x86/adler32_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/fallback_builtins.h -adler32_avx512.obj: $(SRCDIR)/arch/x86/adler32_avx512.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h -adler32_avx512_vnni.obj: $(SRCDIR)/arch/x86/adler32_avx512_vnni.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h -adler32_sse42.obj: $(SRCDIR)/arch/x86/adler32_sse42.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \ - $(SRCDIR)/arch/x86/adler32_ssse3_p.h -adler32_ssse3.obj: $(SRCDIR)/arch/x86/adler32_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \ - $(SRCDIR)/arch/x86/adler32_ssse3_p.h -adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h -functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h -gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h -compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h -uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h -chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -chunkset_avx2.obj: $(SRCDIR)/arch/x86/chunkset_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -chunkset_sse2.obj: $(SRCDIR)/arch/x86/chunkset_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -chunkset_ssse3.obj: $(SRCDIR)/arch/x86/chunkset_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h -crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h -crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h -crc32_pclmulqdq.obj: $(SRCDIR)/arch/x86/crc32_pclmulqdq.c $(SRCDIR)/arch/x86/crc32_pclmulqdq_tpl.h $(SRCDIR)/arch/x86/crc32_fold_pclmulqdq_tpl.h \ - $(SRCDIR)/crc32_fold.h $(SRCDIR)/zbuild.h -deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_quick.obj: $(SRCDIR)/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/trees_emit.h -deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h -inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h -inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h -slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h -slide_hash_avx2.obj: $(SRCDIR)/arch/x86/slide_hash_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h -slide_hash_sse2.obj: $(SRCDIR)/arch/x86/slide_hash_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h -trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h -zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h - -example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h - -minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h +{$(TOP)/win32}.cpp.obj: + $(CXX) -c -I$(TOP) $(WFLAGS) $(CXXFLAGS) $< + +adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_avx2.obj: $(TOP)/arch/x86/adler32_avx2.c $(TOP)/zbuild.h $(TOP)/adler32_p.h $(TOP)/arch/x86/adler32_avx2_p.h $(TOP)/arch/x86/x86_intrins.h +adler32_avx512.obj: $(TOP)/arch/x86/adler32_avx512.c $(TOP)/zbuild.h $(TOP)/arch_functions.h $(TOP)/adler32_p.h $(TOP)/arch/x86/adler32_avx512_p.h $(TOP)/arch/x86/x86_intrins.h +adler32_avx512_vnni.obj: $(TOP)/arch/x86/adler32_avx512_vnni.c $(TOP)/zbuild.h $(TOP)/arch_functions.h $(TOP)/adler32_p.h $(TOP)/arch/x86/adler32_avx512_p.h \ + $(TOP)/arch/x86/adler32_avx2_p.h $(TOP)/arch/x86/x86_intrins.h +adler32_sse42.obj: $(TOP)/arch/x86/adler32_sse42.c $(TOP)/zbuild.h $(TOP)/adler32_p.h \ + $(TOP)/arch/x86/adler32_ssse3_p.h +adler32_ssse3.obj: $(TOP)/arch/x86/adler32_ssse3.c $(TOP)/zbuild.h $(TOP)/adler32_p.h \ + $(TOP)/arch/x86/adler32_ssse3_p.h +adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h +chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +chunkset_avx2.obj: $(TOP)/arch/x86/chunkset_avx2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h +chunkset_sse2.obj: $(TOP)/arch/x86/chunkset_sse2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +chunkset_ssse3.obj: $(TOP)/arch/x86/chunkset_ssse3.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h +compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h +cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h +crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h +crc32_braid_c.obj: $(TOP)/arch/generic/crc32_braid_c.c $(TOP)/zbuild.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h +crc32_braid_comb.obj: $(TOP)/crc32_braid_comb.c $(TOP)/zutil.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h $(TOP)/crc32_braid_comb_p.h +crc32_fold_c.obj: $(TOP)/arch/generic/crc32_fold_c.c $(TOP)/zbuild.h $(TOP)/crc32.h $(TOP)/functable.h $(TOP)/zutil.h +crc32_pclmulqdq.obj: $(TOP)/arch/x86/crc32_pclmulqdq.c $(TOP)/arch/x86/crc32_pclmulqdq_tpl.h +deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h +deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h +deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +functable.obj: $(TOP)/functable.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/cpu_features.h $(TOP)/arch/x86/x86_features.h $(TOP)/arch_functions.h +gzlib.obj: $(TOP)/gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzread.obj: $(TOP)/gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzwrite.obj: $(TOP)/gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +infback.obj: $(TOP)/infback.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h +inflate.obj: $(TOP)/inflate.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h $(TOP)/inffixed_tbl.h +inftrees.obj: $(TOP)/inftrees.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h +insert_string.obj: $(TOP)/insert_string.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +insert_string_roll.obj: $(TOP)/insert_string_roll.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +slide_hash_c.obj: $(TOP)/arch/generic/slide_hash_c.c $(TOP)/zbuild.h $(TOP)/deflate.h +slide_hash_avx2.obj: $(TOP)/arch/x86/slide_hash_avx2.c $(TOP)/zbuild.h $(TOP)/deflate.h +slide_hash_sse2.obj: $(TOP)/arch/x86/slide_hash_sse2.c $(TOP)/zbuild.h $(TOP)/deflate.h +trees.obj: $(TOP)/trees.c $(TOP)/trees.h $(TOP)/trees_emit.h $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/trees_tbl.h +uncompr.obj: $(TOP)/uncompr.c $(TOP)/zbuild.h $(TOP)/zutil.h +zutil.obj: $(TOP)/zutil.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/zutil_p.h $(RESFILE): $(TOP)/win32/$(RCFILE) $(RC) $(RCFLAGS) /fo$@ $(TOP)/win32/$(RCFILE) # testing -test: example.exe minigzip.exe +depcheck: depcheck.exe + depcheck win32\Makefile.msc . + depcheck win32\Makefile.arm . + depcheck win32\Makefile.a64 . + +test: example.exe minigzip.exe depcheck example echo hello world | minigzip | minigzip -d @@ -242,6 +263,12 @@ testdll: example_d.exe minigzip_d.exe example_d echo hello world | minigzip_d | minigzip_d -d +depcheck.obj: $(TOP)/win32/depcheck.cpp + +example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h $(TOP)/deflate.h $(TOP)/test/test_shared_ng.h + +minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h + # cleanup clean: diff --git a/src/native/external/zlib-ng/win32/depcheck.cpp b/src/native/external/zlib-ng/win32/depcheck.cpp new file mode 100644 index 0000000000000..f83bdd685291c --- /dev/null +++ b/src/native/external/zlib-ng/win32/depcheck.cpp @@ -0,0 +1,321 @@ +/* depcheck.cpp - Dependency checker for NMake Makefiles + * Copyright (c) 2024 Mika T. Lindqvist + */ + +#include +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + if (argc != 3) { + printf("Usage: depcheck Makefile \n"); + return -1; + } + std::filebuf fb; + if (fb.open (argv[1],std::ios::in)) { + std::istream is(&fb); + std::string makefile = argv[1]; + std::string l, tmp, tmp2; + while (is) { + std::getline(is, l); + while (l.back() == '\\') { + std::getline(is, tmp); + l.replace(l.length() - 1, 1, tmp); + } + size_t pos = l.find("obj:"); + if (pos != std::string::npos) { + std::string objfile = l.substr(0, pos+3); + printf("File: %s\n", objfile.c_str()); + std::vector files; + std::stringstream ss(l.substr(pos+4)); + while(getline(ss, tmp, ' ')){ + if (tmp != "" && tmp != "/") { + files.push_back(tmp); + } + } + for (auto it = files.begin(); it != files.end(); ++it) { + printf("Dependency: %s\n", (*it).c_str()); + } + if (!files.empty()) { + std::filebuf fb2; + std::string src = files[0]; + size_t pos2 = src.find("$(TOP)"); + if (pos2 != std::string::npos) { + src.replace(pos2, 6, argv[2]); + } + printf("Source: %s\n", src.c_str()); + if (fb2.open(src.c_str(),std::ios::in)) { + std::istream is2(&fb2); + std::vector includes; + while (is2) { + std::getline(is2, l); + pos = l.find("#"); + if (pos != std::string::npos) { + pos2 = l.find("include"); + size_t pos3 = l.find("\""); + if (pos2 != std::string::npos && pos3 != std::string::npos && pos2 > pos && pos3 > pos2) { + tmp = l.substr(pos3 + 1); + pos2 = tmp.find("\""); + if (pos2 != std::string::npos) { + tmp = tmp.substr(0, pos2); + } + pos2 = tmp.find("../"); + if (pos2 != std::string::npos) { + tmp = tmp.substr(3); + } + printf("Line: %s\n", tmp.c_str()); + int found = 0; + for (size_t i = 1; i < files.size(); i++) { + pos3 = files[i].find("$(SUFFIX)"); + if (pos3 != std::string::npos) { + tmp2 = files[i].substr(0, pos3).append(files[i].substr(pos3 + 9)); + printf("Comparing dependency \"%s\" and \"%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == tmp) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == std::string("$(TOP)/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + + tmp2 = files[i].substr(0, pos3).append("-ng").append(files[i].substr(pos3 + 9)); + printf("Comparing dependency \"%s\" and \"%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == tmp) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == std::string("$(TOP)/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + } else { + printf("Comparing dependency \"%s\" and \"%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == tmp) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/generic/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/generic/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/arm/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/arm/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/x86/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/x86/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/test/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/test/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + } + } + // Skip irrelevant dependencies + if (tmp.substr(0, 9) == "arch/s390") found = 1; + if (tmp == "zlib-ng.h" && std::find(includes.begin(), includes.end(), "zlib.h") != includes.end()) found = 1; + if (found == 0) { + printf("%s: Dependency %s missing for %s!\n", makefile.c_str(), tmp.c_str(), objfile.c_str()); + return -1; + } + } + } + } + for (size_t i = 1; i < files.size(); i++) { + int found = 0; + tmp = files[i]; + printf("Dependency: %s\n", tmp.c_str()); + pos2 = tmp.find("$(TOP)"); + if (pos2 != std::string::npos) { + tmp = tmp.substr(7); + } + for (size_t j = 0; j < includes.size(); j++) { + pos2 = tmp.find("$(SUFFIX)"); + if (pos2 != std::string::npos) { + std::string tmp1 = tmp.substr(0, pos2).append(tmp.substr(pos2 + 9)); + printf("[%zd/%zd] Comparing dependency \"%s\" and \"%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == includes[j]) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/generic/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/generic/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/arm/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/arm/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/x86/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/x86/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"test/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("test/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + tmp1 = tmp.substr(0, pos2).append("-ng").append(tmp.substr(pos2 + 9)); + printf("[%zd/%zd] Comparing dependency \"%s\" and \"%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == includes[j]) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/generic/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/generic/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/arm/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/arm/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/x86/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/x86/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"test/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("test/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + } else { + printf("[%zd/%zd] Comparing dependency \"%s\" and \"%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == includes[j]) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/generic/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/generic/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/arm/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/arm/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/x86/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/x86/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"test/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("test/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + } + } + // Skip indirect dependencies + if (tmp.find("arm_features.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "cpu_features.h") != includes.end() + && (makefile.find(".arm") != std::string::npos + || makefile.find(".a64") != std::string::npos)) found = 1; + if (tmp.find("x86_features.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "cpu_features.h") != includes.end() + && makefile.find(".msc") != std::string::npos) found = 1; + // + if (tmp.find("generic_functions.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "arch_functions.h") != includes.end()) found = 1; + if (tmp.find("arm_functions.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "arch_functions.h") != includes.end() + && (makefile.find(".arm") != std::string::npos + || makefile.find(".a64") != std::string::npos)) found = 1; + if (tmp.find("x86_functions.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "arch_functions.h") != includes.end() + && makefile.find(".msc") != std::string::npos) found = 1; + if (found == 0) { + printf("%s: Dependency %s not needed for %s\n", makefile.c_str(), files[i].c_str(), objfile.c_str()); + return -1; + } + } + fb2.close(); + } + } + } + } + fb.close(); + } + return 0; +} diff --git a/src/native/external/zlib-ng/win32/zlib-ng1.rc b/src/native/external/zlib-ng/win32/zlib-ng1.rc index 327f17fd8b41a..f65cfa254e428 100644 --- a/src/native/external/zlib-ng/win32/zlib-ng1.rc +++ b/src/native/external/zlib-ng/win32/zlib-ng1.rc @@ -22,7 +22,7 @@ BEGIN VALUE "FileDescription", "zlib data compression library\0" VALUE "FileVersion", ZLIBNG_VERSION "\0" VALUE "InternalName", "zlib-ng1.dll\0" - VALUE "LegalCopyright", "(C) 1995-2013 Jean-loup Gailly & Mark Adler\0" + VALUE "LegalCopyright", "(C) 1995-2024 Jean-loup Gailly & Mark Adler\0" VALUE "OriginalFilename", "zlib-ng1.dll\0" VALUE "ProductName", "zlib\0" VALUE "ProductVersion", ZLIBNG_VERSION "\0" diff --git a/src/native/external/zlib-ng/win32/zlib1.rc b/src/native/external/zlib-ng/win32/zlib1.rc index 73bc4389c0245..9bb9c18654f99 100644 --- a/src/native/external/zlib-ng/win32/zlib1.rc +++ b/src/native/external/zlib-ng/win32/zlib1.rc @@ -22,7 +22,7 @@ BEGIN VALUE "FileDescription", "zlib data compression library\0" VALUE "FileVersion", ZLIB_VERSION "\0" VALUE "InternalName", "zlib1.dll\0" - VALUE "LegalCopyright", "(C) 1995-2022 Jean-loup Gailly & Mark Adler\0" + VALUE "LegalCopyright", "(C) 1995-2024 Jean-loup Gailly & Mark Adler\0" VALUE "OriginalFilename", "zlib1.dll\0" VALUE "ProductName", "zlib\0" VALUE "ProductVersion", ZLIB_VERSION "\0" diff --git a/src/native/external/zlib-ng/zbuild.h b/src/native/external/zlib-ng/zbuild.h index d550b4c582c33..9157eef9e356b 100644 --- a/src/native/external/zlib-ng/zbuild.h +++ b/src/native/external/zlib-ng/zbuild.h @@ -202,6 +202,24 @@ # define ALIGNED_(x) __declspec(align(x)) #endif +#ifdef HAVE_BUILTIN_ASSUME_ALIGNED +# define HINT_ALIGNED(p,n) __builtin_assume_aligned((void *)(p),(n)) +#else +# define HINT_ALIGNED(p,n) (p) +#endif +#define HINT_ALIGNED_16(p) HINT_ALIGNED((p),16) +#define HINT_ALIGNED_64(p) HINT_ALIGNED((p),64) +#define HINT_ALIGNED_4096(p) HINT_ALIGNED((p),4096) + +/* PADSZ returns needed bytes to pad bpos to pad size + * PAD_NN calculates pad size and adds it to bpos, returning the result. + * All take an integer or a pointer as bpos input. + */ +#define PADSZ(bpos, pad) (((pad) - ((uintptr_t)(bpos) % (pad))) % (pad)) +#define PAD_16(bpos) ((bpos) + PADSZ((bpos),16)) +#define PAD_64(bpos) ((bpos) + PADSZ((bpos),64)) +#define PAD_4096(bpos) ((bpos) + PADSZ((bpos),4096)) + /* Diagnostic functions */ #ifdef ZLIB_DEBUG # include @@ -246,6 +264,31 @@ # endif #endif +#if defined(__has_feature) +# if __has_feature(address_sanitizer) +# define Z_ADDRESS_SANITIZER 1 +# endif +#elif defined(__SANITIZE_ADDRESS__) +# define Z_ADDRESS_SANITIZER 1 +#endif + +/* + * __asan_loadN() and __asan_storeN() calls are inserted by compilers in order to check memory accesses. + * They can be called manually too, with the following caveats: + * gcc says: "warning: implicit declaration of function ‘...’" + * g++ says: "error: new declaration ‘...’ ambiguates built-in declaration ‘...’" + * Accommodate both. + */ +#ifdef Z_ADDRESS_SANITIZER +#ifndef __cplusplus +void __asan_loadN(void *, long); +void __asan_storeN(void *, long); +#endif +#else +# define __asan_loadN(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) +# define __asan_storeN(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) +#endif + #if defined(__has_feature) # if __has_feature(memory_sanitizer) # define Z_MEMORY_SANITIZER 1 @@ -254,7 +297,31 @@ #endif #ifndef Z_MEMORY_SANITIZER +# define __msan_check_mem_is_initialized(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) # define __msan_unpoison(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) #endif +/* Notify sanitizer runtime about an upcoming read access. */ +#define instrument_read(a, size) do { \ + void *__a = (void *)(a); \ + long __size = size; \ + __asan_loadN(__a, __size); \ + __msan_check_mem_is_initialized(__a, __size); \ +} while (0) + +/* Notify sanitizer runtime about an upcoming write access. */ +#define instrument_write(a, size) do { \ + void *__a = (void *)(a); \ + long __size = size; \ + __asan_storeN(__a, __size); \ +} while (0) + +/* Notify sanitizer runtime about an upcoming read/write access. */ +#define instrument_read_write(a, size) do { \ + void *__a = (void *)(a); \ + long __size = size; \ + __asan_storeN(__a, __size); \ + __msan_check_mem_is_initialized(__a, __size); \ +} while (0) + #endif diff --git a/src/native/external/zlib-ng/zconf-ng.h.in b/src/native/external/zlib-ng/zconf-ng.h.in index 226f06a037792..a1b5311b85378 100644 --- a/src/native/external/zlib-ng/zconf-ng.h.in +++ b/src/native/external/zlib-ng/zconf-ng.h.in @@ -1,5 +1,5 @@ /* zconf-ng.h -- configuration of the zlib-ng compression library - * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ diff --git a/src/native/external/zlib-ng/zconf.h.in b/src/native/external/zlib-ng/zconf.h.in index 7a6e281e849d6..be8221fd861e1 100644 --- a/src/native/external/zlib-ng/zconf.h.in +++ b/src/native/external/zlib-ng/zconf.h.in @@ -1,5 +1,5 @@ /* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ diff --git a/src/native/external/zlib-ng/zlib-config.cmake.in b/src/native/external/zlib-ng/zlib-config.cmake.in new file mode 100644 index 0000000000000..2e4ba4f230737 --- /dev/null +++ b/src/native/external/zlib-ng/zlib-config.cmake.in @@ -0,0 +1,12 @@ +set(ZLIB_VERSION @ZLIB_HEADER_VERSION@) + +@PACKAGE_INIT@ + +set_and_check(ZLIB_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@") +set(ZLIB_INCLUDE_DIRS "${ZLIB_INCLUDE_DIR}") +set_and_check(ZLIB_LIB_DIR "@PACKAGE_LIB_INSTALL_DIR@") +set(ZLIB_LIBRARIES ZLIB::ZLIB) + +include("${CMAKE_CURRENT_LIST_DIR}/ZLIB.cmake") + +check_required_components(ZLIB) diff --git a/src/native/external/zlib-ng/zlib-ng-config.cmake.in b/src/native/external/zlib-ng/zlib-ng-config.cmake.in new file mode 100644 index 0000000000000..f7564a905f74d --- /dev/null +++ b/src/native/external/zlib-ng/zlib-ng-config.cmake.in @@ -0,0 +1,10 @@ +set(zlib-ng_VERSION @ZLIBNG_HEADER_VERSION@) + +@PACKAGE_INIT@ + +set_and_check(zlib-ng_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@") +set_and_check(zlib-ng_LIB_DIR "@PACKAGE_LIB_INSTALL_DIR@") + +include("${CMAKE_CURRENT_LIST_DIR}/zlib-ng.cmake") + +check_required_components(zlib-ng) diff --git a/src/native/external/zlib-ng/zlib-ng.h.in b/src/native/external/zlib-ng/zlib-ng.h.in index 342b240afb653..7f7f03ee1553b 100644 --- a/src/native/external/zlib-ng/zlib-ng.h.in +++ b/src/native/external/zlib-ng/zlib-ng.h.in @@ -2,7 +2,7 @@ #define ZNGLIB_H_ /* zlib-ng.h -- interface of the 'zlib-ng' compression library, forked from zlib. - Copyright (C) 1995-2016 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -48,11 +48,11 @@ extern "C" { #endif -#define ZLIBNG_VERSION "2.1.6" -#define ZLIBNG_VERNUM 0x020106F0L /* MMNNRRSM: major minor revision status modified */ +#define ZLIBNG_VERSION "2.2.1" +#define ZLIBNG_VERNUM 0x020201F0L /* MMNNRRSM: major minor revision status modified */ #define ZLIBNG_VER_MAJOR 2 -#define ZLIBNG_VER_MINOR 1 -#define ZLIBNG_VER_REVISION 6 +#define ZLIBNG_VER_MINOR 2 +#define ZLIBNG_VER_REVISION 1 #define ZLIBNG_VER_STATUS F /* 0=devel, 1-E=beta, F=Release (DEPRECATED) */ #define ZLIBNG_VER_STATUSH 0xF /* Hex values: 0=devel, 1-E=beta, F=Release */ #define ZLIBNG_VER_MODIFIED 0 /* non-zero if modified externally from zlib-ng */ @@ -1759,7 +1759,7 @@ uint32_t zng_crc32_combine(uint32_t crc1, uint32_t crc2, z_off64_t len2); seq1 and seq2 with lengths len1 and len2, CRC-32 check values were calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and - len2. + len2. len2 must be non-negative. */ Z_EXTERN Z_EXPORT @@ -1767,7 +1767,7 @@ uint32_t zng_crc32_combine_gen(z_off64_t len2); /* Return the operator corresponding to length len2, to be used with - crc32_combine_op(). + crc32_combine_op(). len2 must be non-negative. */ Z_EXTERN Z_EXPORT diff --git a/src/native/external/zlib-ng/zlib.h.in b/src/native/external/zlib-ng/zlib.h.in index eabb94afe09cc..3dceaa3344ba8 100644 --- a/src/native/external/zlib-ng/zlib.h.in +++ b/src/native/external/zlib-ng/zlib.h.in @@ -1,9 +1,9 @@ #ifndef ZLIB_H_ #define ZLIB_H_ /* zlib.h -- interface of the 'zlib-ng' compression library - Forked from and compatible with zlib 1.2.13 + Forked from and compatible with zlib 1.3.1 - Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -49,20 +49,20 @@ extern "C" { #endif -#define ZLIBNG_VERSION "2.1.6" -#define ZLIBNG_VERNUM 0x020106F0L /* MMNNRRSM: major minor revision status modified */ +#define ZLIBNG_VERSION "2.2.1" +#define ZLIBNG_VERNUM 0x020201F0L /* MMNNRRSM: major minor revision status modified */ #define ZLIBNG_VER_MAJOR 2 -#define ZLIBNG_VER_MINOR 1 -#define ZLIBNG_VER_REVISION 6 +#define ZLIBNG_VER_MINOR 2 +#define ZLIBNG_VER_REVISION 1 #define ZLIBNG_VER_STATUS F /* 0=devel, 1-E=beta, F=Release (DEPRECATED) */ #define ZLIBNG_VER_STATUSH 0xF /* Hex values: 0=devel, 1-E=beta, F=Release */ #define ZLIBNG_VER_MODIFIED 0 /* non-zero if modified externally from zlib-ng */ -#define ZLIB_VERSION "1.3.0.zlib-ng" -#define ZLIB_VERNUM 0x130f +#define ZLIB_VERSION "1.3.1.zlib-ng" +#define ZLIB_VERNUM 0x131f #define ZLIB_VER_MAJOR 1 #define ZLIB_VER_MINOR 3 -#define ZLIB_VER_REVISION 0 +#define ZLIB_VER_REVISION 1 #define ZLIB_VER_SUBREVISION 15 /* 15=fork (0xf) */ /* @@ -220,7 +220,7 @@ typedef gz_header *gz_headerp; #define Z_DEFLATED 8 /* The deflate compression method (the only one supported in this version) */ -#define Z_NULL NULL /* for compatibility with zlib, was for initializing zalloc, zfree, opaque */ +#define Z_NULL 0 /* for compatibility with zlib, was for initializing zalloc, zfree, opaque */ #define zlib_version zlibVersion() /* for compatibility with versions < 1.0.2 */ @@ -1732,14 +1732,14 @@ Z_EXTERN unsigned long Z_EXPORT crc32_combine(unsigned long crc1, unsigned long seq1 and seq2 with lengths len1 and len2, CRC-32 check values were calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and - len2. + len2. len2 must be non-negative. */ /* Z_EXTERN unsigned long Z_EXPORT crc32_combine_gen(z_off_t len2); Return the operator corresponding to length len2, to be used with - crc32_combine_op(). + crc32_combine_op(). len2 must be non-negative. */ Z_EXTERN unsigned long Z_EXPORT crc32_combine_op(unsigned long crc1, unsigned long crc2, diff --git a/src/native/external/zlib-ng/zlib.map b/src/native/external/zlib-ng/zlib.map index ebca10d351ed0..293e803729d81 100644 --- a/src/native/external/zlib-ng/zlib.map +++ b/src/native/external/zlib-ng/zlib.map @@ -13,6 +13,7 @@ ZLIB_1.2.0 { zcfree; z_errmsg; gz_error; + gz_intmax; _*; }; diff --git a/src/native/external/zlib-ng/zutil.c b/src/native/external/zlib-ng/zutil.c index 270a28c742014..39fbceb4a0159 100644 --- a/src/native/external/zlib-ng/zutil.c +++ b/src/native/external/zlib-ng/zutil.c @@ -21,7 +21,7 @@ z_const char * const PREFIX(z_errmsg)[10] = { }; const char PREFIX3(vstring)[] = - " zlib-ng 2.1.6"; + " zlib-ng 2.2.1"; #ifdef ZLIB_COMPAT const char * Z_EXPORT zlibVersion(void) { @@ -109,51 +109,3 @@ void Z_INTERNAL PREFIX(zcfree)(void *opaque, void *ptr) { Z_UNUSED(opaque); zng_free(ptr); } - -/* Since we support custom memory allocators, some which might not align memory as we expect, - * we have to ask for extra memory and return an aligned pointer. */ -void Z_INTERNAL *PREFIX3(alloc_aligned)(zng_calloc_func zalloc, void *opaque, unsigned items, unsigned size, unsigned align) { - uintptr_t return_ptr, original_ptr; - uint32_t alloc_size, align_diff; - void *ptr; - - /* If no custom calloc function used then call zlib-ng's aligned calloc */ - if (zalloc == PREFIX(zcalloc)) - return PREFIX(zcalloc)(opaque, items, size); - - /* Allocate enough memory for proper alignment and to store the original memory pointer */ - alloc_size = sizeof(void *) + (items * size) + align; - ptr = zalloc(opaque, 1, alloc_size); - if (!ptr) - return NULL; - - /* Calculate return pointer address with space enough to store original pointer */ - align_diff = align - ((uintptr_t)ptr % align); - return_ptr = (uintptr_t)ptr + align_diff; - if (align_diff < sizeof(void *)) - return_ptr += align; - - /* Store the original pointer for free() */ - original_ptr = return_ptr - sizeof(void *); - memcpy((void *)original_ptr, &ptr, sizeof(void *)); - - /* Return properly aligned pointer in allocation */ - return (void *)return_ptr; -} - -void Z_INTERNAL PREFIX3(free_aligned)(zng_cfree_func zfree, void *opaque, void *ptr) { - /* If no custom cfree function used then call zlib-ng's aligned cfree */ - if (zfree == PREFIX(zcfree)) { - PREFIX(zcfree)(opaque, ptr); - return; - } - if (!ptr) - return; - - /* Calculate offset to original memory allocation pointer */ - void *original_ptr = (void *)((uintptr_t)ptr - sizeof(void *)); - void *free_ptr = *(void **)original_ptr; - - /* Free original memory allocation */ - zfree(opaque, free_ptr); -} diff --git a/src/native/external/zlib-ng/zutil.h b/src/native/external/zlib-ng/zutil.h index 663616b44d89e..a6284502d047e 100644 --- a/src/native/external/zlib-ng/zutil.h +++ b/src/native/external/zlib-ng/zutil.h @@ -1,7 +1,7 @@ #ifndef ZUTIL_H_ #define ZUTIL_H_ /* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2022 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -24,7 +24,7 @@ typedef unsigned long ulg; extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ /* (size given to avoid silly warnings with Visual C++) */ -#define ERR_MSG(err) PREFIX(z_errmsg)[Z_NEED_DICT-(err)] +#define ERR_MSG(err) PREFIX(z_errmsg)[(err) < -6 || (err) > 2 ? 9 : 2 - (err)] #define ERR_RETURN(strm, err) return (strm->msg = ERR_MSG(err), (err)) /* To be used only when the state is known to be valid */ @@ -103,7 +103,7 @@ extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ # define OS_CODE 6 #endif -#if defined(MACOS) || defined(TARGET_OS_MAC) +#if defined(MACOS) # define OS_CODE 7 #endif @@ -137,12 +137,4 @@ void Z_INTERNAL PREFIX(zcfree)(void *opaque, void *ptr); typedef void *zng_calloc_func(void *opaque, unsigned items, unsigned size); typedef void zng_cfree_func(void *opaque, void *ptr); -void Z_INTERNAL *PREFIX3(alloc_aligned)(zng_calloc_func zalloc, void *opaque, unsigned items, unsigned size, unsigned align); -void Z_INTERNAL PREFIX3(free_aligned)(zng_cfree_func zfree, void *opaque, void *ptr); - -#define ZALLOC(strm, items, size) PREFIX3(alloc_aligned)((strm)->zalloc, (strm)->opaque, (items), (size), 64) -#define ZFREE(strm, addr) PREFIX3(free_aligned)((strm)->zfree, (strm)->opaque, (void *)(addr)) - -#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} - #endif /* ZUTIL_H_ */ diff --git a/src/native/external/zlib-ng/zutil_p.h b/src/native/external/zlib-ng/zutil_p.h index caec91d50d364..97799f0ce31ef 100644 --- a/src/native/external/zlib-ng/zutil_p.h +++ b/src/native/external/zlib-ng/zutil_p.h @@ -16,15 +16,19 @@ /* Function to allocate 16 or 64-byte aligned memory */ static inline void *zng_alloc(size_t size) { -#ifdef HAVE_POSIX_MEMALIGN +#ifdef HAVE_ALIGNED_ALLOC + /* Size must be a multiple of alignment */ + size = (size + (64 - 1)) & ~(64 - 1); + return (void *)aligned_alloc(64, size); /* Defined in C11 */ +#elif defined(HAVE_POSIX_MEMALIGN) void *ptr; return posix_memalign(&ptr, 64, size) ? NULL : ptr; #elif defined(_WIN32) return (void *)_aligned_malloc(size, 64); #elif defined(__APPLE__) - return (void *)malloc(size); /* MacOS always aligns to 16 bytes */ -#elif defined(HAVE_ALIGNED_ALLOC) - return (void *)aligned_alloc(64, size); + /* Fallback for when posix_memalign and aligned_alloc are not available. + * On macOS, it always aligns to 16 bytes. */ + return (void *)malloc(size); #else return (void *)memalign(64, size); #endif diff --git a/src/native/libs/System.IO.Compression.Native/CMakeLists.txt b/src/native/libs/System.IO.Compression.Native/CMakeLists.txt index 3ad593efe472c..e13e1271809b1 100644 --- a/src/native/libs/System.IO.Compression.Native/CMakeLists.txt +++ b/src/native/libs/System.IO.Compression.Native/CMakeLists.txt @@ -14,12 +14,6 @@ set(NATIVECOMPRESSION_SOURCES pal_zlib.c ) -if (HOST_WIN32 OR CLR_CMAKE_TARGET_WIN32) - list(APPEND NATIVECOMPRESSION_SOURCES "zlib_allocator_win.c") -else() - list(APPEND NATIVECOMPRESSION_SOURCES "zlib_allocator_unix.c") -endif() - if (NOT CLR_CMAKE_TARGET_BROWSER AND NOT CLR_CMAKE_TARGET_WASI) if (CLR_CMAKE_USE_SYSTEM_BROTLI) diff --git a/src/native/libs/System.IO.Compression.Native/pal_zlib.c b/src/native/libs/System.IO.Compression.Native/pal_zlib.c index 87b7043ea20ff..a04f60aa876a8 100644 --- a/src/native/libs/System.IO.Compression.Native/pal_zlib.c +++ b/src/native/libs/System.IO.Compression.Native/pal_zlib.c @@ -11,7 +11,6 @@ #else #include "pal_utilities.h" #endif -#include #include c_static_assert(PAL_Z_NOFLUSH == Z_NO_FLUSH); @@ -40,9 +39,6 @@ static int32_t Init(PAL_ZStream* stream) { z_stream* zStream = (z_stream*)calloc(1, sizeof(z_stream)); - zStream->zalloc = z_custom_calloc; - zStream->zfree = z_custom_cfree; - stream->internalState = zStream; if (zStream != NULL) diff --git a/src/native/libs/System.IO.Compression.Native/zlib_allocator.h b/src/native/libs/System.IO.Compression.Native/zlib_allocator.h deleted file mode 100644 index cadd00bb5879c..0000000000000 --- a/src/native/libs/System.IO.Compression.Native/zlib_allocator.h +++ /dev/null @@ -1,8 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include // voidpf - -voidpf z_custom_calloc(voidpf opaque, unsigned items, unsigned size); - -void z_custom_cfree(voidpf opaque, voidpf ptr); diff --git a/src/native/libs/System.IO.Compression.Native/zlib_allocator_unix.c b/src/native/libs/System.IO.Compression.Native/zlib_allocator_unix.c deleted file mode 100644 index 38638d6158526..0000000000000 --- a/src/native/libs/System.IO.Compression.Native/zlib_allocator_unix.c +++ /dev/null @@ -1,151 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include -#include -#include -#include -#include - -/* A custom allocator for zlib that provides some defense-in-depth over standard malloc / free. - * (non-Windows version) - * - * 1. When zlib allocates fixed-length data structures for containing stream metadata, we zero - * the memory before using it, preventing use of uninitialized memory within these structures. - * Ideally we would do this for dynamically-sized buffers as well, but there is a measurable - * perf impact to doing this. Zeroing fixed structures seems like a good trade-off here, since - * these data structures contain most of the metadata used for managing the variable-length - * dynamically allocated buffers. - * - * 2. We put a cookie both before and after any allocated memory, which allows us to detect local - * buffer overruns on the call to free(). The cookie values are tied to the addresses where - * the data is located in memory. - * - * 3. We trash the aforementioned cookie on free(), which allows us to detect double-free. - * - * If any of these checks fails, the application raises SIGABRT. - */ - -#ifndef MEMORY_ALLOCATION_ALIGNMENT -// malloc() returns an address suitably aligned for any built-in data type. -// Historically, this has been twice the arch's natural word size. -#ifdef HOST_64BIT -#define MEMORY_ALLOCATION_ALIGNMENT 16 -#else -#define MEMORY_ALLOCATION_ALIGNMENT 8 -#endif -#endif - -typedef struct _DOTNET_ALLOC_COOKIE -{ - void* Address; - size_t Size; -} DOTNET_ALLOC_COOKIE; - -static bool SafeAdd(size_t a, size_t b, size_t* sum) -{ - if (SIZE_MAX - a >= b) { *sum = a + b; return true; } - else { *sum = 0; return false; } -} - -static bool SafeMult(size_t a, size_t b, size_t* product) -{ - if (SIZE_MAX / a >= b) { *product = a * b; return true; } - else { *product = 0; return false; } -} - -static DOTNET_ALLOC_COOKIE ReadAllocCookieUnaligned(const void* pSrc) -{ - DOTNET_ALLOC_COOKIE vCookie; - memcpy(&vCookie, pSrc, sizeof(DOTNET_ALLOC_COOKIE)); - return vCookie; -} - -static void WriteAllocCookieUnaligned(void* pDest, DOTNET_ALLOC_COOKIE vCookie) -{ - memcpy(pDest, &vCookie, sizeof(DOTNET_ALLOC_COOKIE)); -} - -// Historically, the memory allocator always returns addresses aligned to some -// particular boundary. We'll make that same guarantee here just in case somebody -// depends on it. -const size_t DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING = (sizeof(DOTNET_ALLOC_COOKIE) + MEMORY_ALLOCATION_ALIGNMENT - 1) & ~((size_t)MEMORY_ALLOCATION_ALIGNMENT - 1); -const size_t DOTNET_ALLOC_TRAILER_COOKIE_SIZE = sizeof(DOTNET_ALLOC_COOKIE); - -voidpf z_custom_calloc(voidpf opaque, unsigned items, unsigned size) -{ - (void)opaque; // unreferenced formal parameter - - // If initializing a fixed-size structure, zero the memory. - bool fZeroMemory = (items == 1); - - size_t cbRequested; - if (sizeof(items) + sizeof(size) <= sizeof(cbRequested)) - { - // multiplication can't overflow; no need for safeint - cbRequested = (size_t)items * (size_t)size; - } - else - { - // multiplication can overflow; go through safeint - if (!SafeMult((size_t)items, (size_t)size, &cbRequested)) { return NULL; } - } - - // Make sure the actual allocation has enough room for our frontside & backside cookies. - size_t cbActualAllocationSize; - if (!SafeAdd(cbRequested, DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING + DOTNET_ALLOC_TRAILER_COOKIE_SIZE, &cbActualAllocationSize)) { return NULL; } - - void* pAlloced = (fZeroMemory) ? calloc(1, cbActualAllocationSize) : malloc(cbActualAllocationSize); - if (pAlloced == NULL) { return NULL; } // OOM - - DOTNET_ALLOC_COOKIE* pHeaderCookie = (DOTNET_ALLOC_COOKIE*)pAlloced; - uint8_t* pReturnToCaller = (uint8_t*)pAlloced + DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING; - uint8_t* pTrailerCookie = pReturnToCaller + cbRequested; - - // Write out the same cookie for the header & the trailer, then we're done. - - DOTNET_ALLOC_COOKIE vCookie = { 0 }; - vCookie.Address = pReturnToCaller; - vCookie.Size = cbRequested; - *pHeaderCookie = vCookie; // aligned - WriteAllocCookieUnaligned(pTrailerCookie, vCookie); - - return pReturnToCaller; -} - -static void zcfree_trash_cookie(void* pCookie) -{ - memset(pCookie, 0, sizeof(DOTNET_ALLOC_COOKIE)); -} - -void z_custom_cfree(voidpf opaque, voidpf ptr) -{ - (void)opaque; // unreferenced formal parameter - - if (ptr == NULL) { return; } // ok to free nullptr - - // Check cookie at beginning - - DOTNET_ALLOC_COOKIE* pHeaderCookie = (DOTNET_ALLOC_COOKIE*)((uint8_t*)ptr - DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING); - if (pHeaderCookie->Address != ptr) { goto Fail; } - size_t cbRequested = pHeaderCookie->Size; - - // Check cookie at end - - uint8_t* pTrailerCookie = (uint8_t*)ptr + cbRequested; - DOTNET_ALLOC_COOKIE vTrailerCookie = ReadAllocCookieUnaligned(pTrailerCookie); - if (vTrailerCookie.Address != ptr) { goto Fail; } - if (vTrailerCookie.Size != cbRequested) { goto Fail; } - - // Checks passed - now trash the cookies and free memory - - zcfree_trash_cookie(pHeaderCookie); - zcfree_trash_cookie(pTrailerCookie); - - free(pHeaderCookie); - return; - -Fail: - abort(); // cookie check failed -} diff --git a/src/native/libs/System.IO.Compression.Native/zlib_allocator_win.c b/src/native/libs/System.IO.Compression.Native/zlib_allocator_win.c deleted file mode 100644 index fefba550c16ed..0000000000000 --- a/src/native/libs/System.IO.Compression.Native/zlib_allocator_win.c +++ /dev/null @@ -1,180 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include -#include -#include -#include /* _ASSERTE */ - -#include -#include -#include -#include - -/* A custom allocator for zlib that provides some defense-in-depth over standard malloc / free. - * (Windows-specific version) - * - * 1. In 64-bit processes, we use a custom heap rather than relying on the standard process heap. - * This should cause zlib's buffers to go into a separate address range from the rest of app - * data, making it more difficult for buffer overruns to affect non-zlib-related data structures. - * - * 2. When zlib allocates fixed-length data structures for containing stream metadata, we zero - * the memory before using it, preventing use of uninitialized memory within these structures. - * Ideally we would do this for dynamically-sized buffers as well, but there is a measurable - * perf impact to doing this. Zeroing fixed structures seems like a good trade-off here, since - * these data structures contain most of the metadata used for managing the variable-length - * dynamically allocated buffers. - * - * 3. We put a cookie both before and after any allocated memory, which allows us to detect local - * buffer overruns on the call to free(). The cookie values are enciphered to make it more - * difficult for somebody to guess a correct value. - * - * 4. We trash the aforementioned cookie on free(), which allows us to detect double-free. - * - * If any of these checks fails, the application terminates immediately, optionally triggering a - * crash dump. We use a special code that's easy to search for in Watson. - */ - -// Gets the special heap we'll allocate from. -HANDLE GetZlibHeap() -{ -#ifdef _WIN64 - static HANDLE s_hPublishedHeap = NULL; - - // If already initialized, return immediately. - // We don't need a volatile read here since the publish is performed with release semantics. - if (s_hPublishedHeap != NULL) { return s_hPublishedHeap; } - - // Attempt to create a new heap. The heap will be dynamically sized. - HANDLE hNewHeap = HeapCreate(0, 0, 0); - - if (hNewHeap != NULL) - { - // We created a new heap. Attempt to publish it. - if (InterlockedCompareExchangePointer(&s_hPublishedHeap, hNewHeap, NULL) != NULL) - { - HeapDestroy(hNewHeap); // Somebody published before us. Destroy our heap. - hNewHeap = NULL; // Guard against accidental use later in the method. - } - } - else - { - // If we can't create a new heap, fall back to the process default heap. - InterlockedCompareExchangePointer(&s_hPublishedHeap, GetProcessHeap(), NULL); - } - - // Some thread - perhaps us, perhaps somebody else - published the heap. Return it. - // We don't need a volatile read here since the publish is performed with release semantics. - _ASSERTE(s_hPublishedHeap != NULL); - return s_hPublishedHeap; -#else - // We don't want to create a new heap in a 32-bit process because it could end up - // reserving too much of the address space. Instead, fall back to the normal process heap. - return GetProcessHeap(); -#endif -} - -typedef struct _DOTNET_ALLOC_COOKIE -{ - PVOID CookieValue; - union _Size - { - SIZE_T RawValue; - LPVOID EncodedValue; - } Size; -} DOTNET_ALLOC_COOKIE; - -// Historically, the Windows memory allocator always returns addresses aligned to some -// particular boundary. We'll make that same guarantee here just in case somebody -// depends on it. -const SIZE_T DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING = (sizeof(DOTNET_ALLOC_COOKIE) + MEMORY_ALLOCATION_ALIGNMENT - 1) & ~((SIZE_T)MEMORY_ALLOCATION_ALIGNMENT - 1); -const SIZE_T DOTNET_ALLOC_TRAILER_COOKIE_SIZE = sizeof(DOTNET_ALLOC_COOKIE); - -voidpf z_custom_calloc(opaque, items, size) - voidpf opaque; - unsigned items; - unsigned size; -{ - (void)opaque; // suppress C4100 - unreferenced formal parameter - - // If initializing a fixed-size structure, zero the memory. - DWORD dwFlags = (items == 1) ? HEAP_ZERO_MEMORY : 0; - - SIZE_T cbRequested; - if (sizeof(items) + sizeof(size) <= sizeof(cbRequested)) - { - // multiplication can't overflow; no need for safeint - cbRequested = (SIZE_T)items * (SIZE_T)size; - } - else - { - // multiplication can overflow; go through safeint - if (FAILED(SIZETMult(items, size, &cbRequested))) { return NULL; } - } - - // Make sure the actual allocation has enough room for our frontside & backside cookies. - SIZE_T cbActualAllocationSize; - if (FAILED(SIZETAdd(cbRequested, DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING + DOTNET_ALLOC_TRAILER_COOKIE_SIZE, &cbActualAllocationSize))) { return NULL; } - - LPVOID pAlloced = HeapAlloc(GetZlibHeap(), dwFlags, cbActualAllocationSize); - if (pAlloced == NULL) { return NULL; } // OOM - - // Now set the header & trailer cookies - DOTNET_ALLOC_COOKIE* pHeaderCookie = (DOTNET_ALLOC_COOKIE*)pAlloced; - pHeaderCookie->CookieValue = EncodePointer(&pHeaderCookie->CookieValue); - pHeaderCookie->Size.RawValue = cbRequested; - - LPBYTE pReturnToCaller = (LPBYTE)pHeaderCookie + DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING; - - UNALIGNED DOTNET_ALLOC_COOKIE* pTrailerCookie = (UNALIGNED DOTNET_ALLOC_COOKIE*)(pReturnToCaller + cbRequested); - pTrailerCookie->CookieValue = EncodePointer(&pTrailerCookie->CookieValue); - pTrailerCookie->Size.EncodedValue = EncodePointer((PVOID)cbRequested); - - return pReturnToCaller; -} - -FORCEINLINE -void zcfree_trash_cookie(UNALIGNED DOTNET_ALLOC_COOKIE* pCookie) -{ - memset(pCookie, 0, sizeof(*pCookie)); - pCookie->CookieValue = (PVOID)(SIZE_T)0xDEADBEEF; -} - -// Marked noinline to keep it on the call stack during crash reports. -DECLSPEC_NOINLINE -DECLSPEC_NORETURN -void zcfree_cookie_check_failed() -{ - __fastfail(FAST_FAIL_HEAP_METADATA_CORRUPTION); -} - -void z_custom_cfree(opaque, ptr) - voidpf opaque; - voidpf ptr; -{ - (void)opaque; // suppress C4100 - unreferenced formal parameter - - if (ptr == NULL) { return; } // ok to free nullptr - - // Check cookie at beginning and end - - DOTNET_ALLOC_COOKIE* pHeaderCookie = (DOTNET_ALLOC_COOKIE*)((LPBYTE)ptr - DOTNET_ALLOC_HEADER_COOKIE_SIZE_WITH_PADDING); - if (DecodePointer(pHeaderCookie->CookieValue) != &pHeaderCookie->CookieValue) { goto Fail; } - SIZE_T cbRequested = pHeaderCookie->Size.RawValue; - - UNALIGNED DOTNET_ALLOC_COOKIE* pTrailerCookie = (UNALIGNED DOTNET_ALLOC_COOKIE*)((LPBYTE)ptr + cbRequested); - if (DecodePointer(pTrailerCookie->CookieValue) != &pTrailerCookie->CookieValue) { goto Fail; } - if (DecodePointer(pTrailerCookie->Size.EncodedValue) != (LPVOID)cbRequested) { goto Fail; } - - // Checks passed - now trash the cookies and free memory - - zcfree_trash_cookie(pHeaderCookie); - zcfree_trash_cookie(pTrailerCookie); - - if (!HeapFree(GetZlibHeap(), 0, pHeaderCookie)) { goto Fail; } - return; - -Fail: - zcfree_cookie_check_failed(); -}