Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: update Base64 SIMD library #45091

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions deps/base64/base64.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,15 @@
'HAVE_SSE42=1',
'HAVE_AVX=1',
'HAVE_AVX2=1',
'HAVE_AVX512=1',
],
'dependencies': [
'base64_ssse3',
'base64_sse41',
'base64_sse42',
'base64_avx',
'base64_avx2',
'base64_avx512',
],
}, {
'sources': [
Expand All @@ -61,6 +63,7 @@
'base64/lib/arch/sse42/codec.c',
'base64/lib/arch/avx/codec.c',
'base64/lib/arch/avx2/codec.c',
'base64/lib/arch/avx512/codec.c',
],
}],
],
Expand Down Expand Up @@ -162,6 +165,30 @@
],
},

{
'target_name': 'base64_avx512',
'type': 'static_library',
'include_dirs': [ 'base64/include', 'base64/lib' ],
'sources': [ 'base64/lib/arch/avx512/codec.c' ],
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX512=1' ],
'conditions': [
[ 'OS!="win"', {
'cflags': [ '-mavx512vl', '-mavx512vbmi' ],
'xcode_settings': {
'OTHER_CFLAGS': [ '-mavx512vl', '-mavx512vbmi' ]
},
}, {
'msvs_settings': {
'VCCLCompilerTool': {
'AdditionalOptions': [
'/arch:AVX512'
],
},
},
}],
],
},

{
'target_name': 'base64_neon32',
'type': 'static_library',
Expand Down
6 changes: 5 additions & 1 deletion deps/base64/base64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if (POLICY CMP0127)
cmake_policy(SET CMP0127 NEW)
endif()

project(base64 LANGUAGES C VERSION 0.4.0)
project(base64 LANGUAGES C VERSION 0.5.0)

include(GNUInstallDirs)
include(CMakeDependentOption)
Expand Down Expand Up @@ -62,6 +62,8 @@ cmake_dependent_option(BASE64_WITH_AVX "add AVX codepath" ON ${_IS_X86} OFF)
add_feature_info(AVX BASE64_WITH_AVX "add AVX codepath")
cmake_dependent_option(BASE64_WITH_AVX2 "add AVX 2 codepath" ON ${_IS_X86} OFF)
add_feature_info(AVX2 BASE64_WITH_AVX2 "add AVX2 codepath")
cmake_dependent_option(BASE64_WITH_AVX512 "add AVX 512 codepath" ON ${_IS_X86} OFF)
add_feature_info(AVX2 BASE64_WITH_AVX512 "add AVX512 codepath")

cmake_dependent_option(BASE64_WITH_NEON32 "add NEON32 codepath" OFF _TARGET_ARCH_arm OFF)
add_feature_info(NEON32 BASE64_WITH_NEON32 "add NEON32 codepath")
Expand Down Expand Up @@ -118,6 +120,7 @@ add_library(base64
lib/arch/sse42/codec.c
lib/arch/avx/codec.c
lib/arch/avx2/codec.c
lib/arch/avx512/codec.c

lib/arch/neon32/codec.c
lib/arch/neon64/codec.c
Expand Down Expand Up @@ -206,6 +209,7 @@ if (_TARGET_ARCH STREQUAL "x86" OR _TARGET_ARCH STREQUAL "x64")
configure_codec(SSE42 __SSSE4_2__)
configure_codec(AVX)
configure_codec(AVX2)
configure_codec(AVX512)

elseif (_TARGET_ARCH STREQUAL "arm")
set(BASE64_NEON32_CFLAGS "${COMPILE_FLAGS_NEON32}" CACHE STRING "the NEON32 compile flags (for 'lib/arch/neon32/codec.c')")
Expand Down
4 changes: 2 additions & 2 deletions deps/base64/base64/LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Copyright (c) 2005-2007, Nick Galbreath
Copyright (c) 2013-2019, Alfred Klomp
Copyright (c) 2015-2017, Wojciech Mula
Copyright (c) 2015-2018, Wojciech Muła
Copyright (c) 2016-2017, Matthieu Darbois
Copyright (c) 2013-2022, Alfred Klomp
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
9 changes: 8 additions & 1 deletion deps/base64/base64/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic
OBJCOPY ?= objcopy

OBJS = \
lib/arch/avx512/codec.o \
lib/arch/avx2/codec.o \
lib/arch/generic/codec.o \
lib/arch/neon32/codec.o \
Expand All @@ -16,6 +17,7 @@ OBJS = \
lib/codec_choose.o \
lib/tables/tables.o

HAVE_AVX512 = 0
HAVE_AVX2 = 0
HAVE_NEON32 = 0
HAVE_NEON64 = 0
Expand All @@ -26,6 +28,9 @@ HAVE_AVX = 0

# The user should supply compiler flags for the codecs they want to build.
# Check which codecs we're going to include:
ifdef AVX512_CFLAGS
HAVE_AVX512 = 1
endif
ifdef AVX2_CFLAGS
HAVE_AVX2 = 1
endif
Expand Down Expand Up @@ -64,7 +69,8 @@ lib/libbase64.o: $(OBJS)
$(OBJCOPY) --keep-global-symbols=lib/exports.txt $@

lib/config.h:
@echo "#define HAVE_AVX2 $(HAVE_AVX2)" > $@
@echo "#define HAVE_AVX512 $(HAVE_AVX512)" > $@
@echo "#define HAVE_AVX2 $(HAVE_AVX2)" >> $@
@echo "#define HAVE_NEON32 $(HAVE_NEON32)" >> $@
@echo "#define HAVE_NEON64 $(HAVE_NEON64)" >> $@
@echo "#define HAVE_SSSE3 $(HAVE_SSSE3)" >> $@
Expand All @@ -75,6 +81,7 @@ lib/config.h:
$(OBJS): lib/config.h
$(OBJS): CFLAGS += -Ilib

lib/arch/avx512/codec.o: CFLAGS += $(AVX512_CFLAGS)
lib/arch/avx2/codec.o: CFLAGS += $(AVX2_CFLAGS)
lib/arch/neon32/codec.o: CFLAGS += $(NEON32_CFLAGS)
lib/arch/neon64/codec.o: CFLAGS += $(NEON64_CFLAGS)
Expand Down
25 changes: 21 additions & 4 deletions deps/base64/base64/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![Build Status](https://github.com/aklomp/base64/actions/workflows/test.yml/badge.svg)](https://github.com/aklomp/base64/actions/workflows/test.yml)

This is an implementation of a base64 stream encoding/decoding library in C99
with SIMD (AVX2, NEON, AArch64/NEON, SSSE3, SSE4.1, SSE4.2, AVX) and
with SIMD (AVX2, AVX512, NEON, AArch64/NEON, SSSE3, SSE4.1, SSE4.2, AVX) and
[OpenMP](http://www.openmp.org) acceleration. It also contains wrapper functions
to encode/decode simple length-delimited strings. This library aims to be:

Expand All @@ -19,6 +19,10 @@ will pick an optimized codec that lets it encode/decode 12 or 24 bytes at a
time, which gives a speedup of four or more times compared to the "plain"
bytewise codec.

AVX512 support is only for encoding at present, utilizing the AVX512 VL and VBMI
instructions. Decoding part reused AVX2 implementations. For CPUs later than
Cannonlake (manufactured in 2018) supports these instructions.

NEON support is hardcoded to on or off at compile time, because portable
runtime feature detection is unavailable on ARM.

Expand Down Expand Up @@ -59,6 +63,9 @@ optimizations described by Wojciech Muła in a
[articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html).
His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64).

The AVX512 encoder is based on code from Wojciech Muła's
[base64simd](https://github.com/WojciechMula/base64simd) library.

The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl).

## Building
Expand All @@ -76,8 +83,8 @@ To compile just the "plain" library without SIMD codecs, type:
make lib/libbase64.o
```

Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `NEON32_CFLAGS`, `NEON64_CFLAGS`,
`SSSE3_CFLAGS`, `SSE41_CFLAGS`, `SSE42_CFLAGS` and/or `AVX_CFLAGS` environment variables.
Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `AVX512_CFLAGS`,
`NEON32_CFLAGS`, `NEON64_CFLAGS`, `SSSE3_CFLAGS`, `SSE41_CFLAGS`, `SSE42_CFLAGS` and/or `AVX_CFLAGS` environment variables.
A typical build invocation on x86 looks like this:

```sh
Expand All @@ -93,6 +100,15 @@ Example:
AVX2_CFLAGS=-mavx2 make
```

### AVX512

To build and include the AVX512 codec, set the `AVX512_CFLAGS` environment variable to a value that will turn on AVX512 support in your compiler, typically `-mavx512vl -mavx512vbmi`.
Example:

```sh
AVX512_CFLAGS="-mavx512vl -mavx512vbmi" make
```

The codec will only be used if runtime feature detection shows that the target machine supports AVX2.

### SSSE3
Expand Down Expand Up @@ -208,6 +224,7 @@ Mainly there for testing purposes, this is also useful on ARM where the only way
The following constants can be used:

- `BASE64_FORCE_AVX2`
- `BASE64_FORCE_AVX512`
- `BASE64_FORCE_NEON32`
- `BASE64_FORCE_NEON64`
- `BASE64_FORCE_PLAIN`
Expand Down Expand Up @@ -434,7 +451,7 @@ x86 processors
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4884\* | 7099\* | 4917\* | 7057\* | 4799\* | 7143\* | 4902\* | 7219\* |
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 5212\* | 8849\* | 5284\* | 9099\* | 5289\* | 9220\* | 4849\* | 9200\* |
| i7-4870HQ @ 2.5 GHz | 1471\* | 3066\* | 6721\* | 6962\* | 7015\* | 8267\* | 8328\* | 11576\* |
| i5-4590S @ 3.0 GHz | 3356 | 3197 | 4363 | 6104 | 4243 | 6233 | 4160 | 6344 |
| i5-4590S @ 3.0 GHz | 3356 | 3197 | 4363 | 6104 | 4243\* | 6233 | 4160\* | 6344 |
| Xeon X5570 @ 2.93 GHz | 2161 | 1508 | 3160 | 3915 | - | - | - | - |
| Pentium4 @ 3.4 GHz | 896 | 740 | - | - | - | - | - | - |
| Atom N270 | 243 | 266 | 508 | 387 | - | - | - | - |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ macro(define_SIMD_compile_flags)
set(COMPILE_FLAGS_SSE42 "-msse4.2")
set(COMPILE_FLAGS_AVX "-mavx")
set(COMPILE_FLAGS_AVX2 "-mavx2")
set(COMPILE_FLAGS_AVX512 "-mavx512vl -mavx512vbmi")

#arm
set(COMPILE_FLAGS_NEON32 "-mfpu=neon")
Expand All @@ -30,5 +31,6 @@ macro(define_SIMD_compile_flags)
set(COMPILE_FLAGS_SSE42 " ")
set(COMPILE_FLAGS_AVX "/arch:AVX")
set(COMPILE_FLAGS_AVX2 "/arch:AVX2")
set(COMPILE_FLAGS_AVX512 "/arch:AVX512")
endif()
endmacro(define_SIMD_compile_flags)
3 changes: 3 additions & 0 deletions deps/base64/base64/cmake/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#cmakedefine01 BASE64_WITH_AVX2
#define HAVE_AVX2 BASE64_WITH_AVX2

#cmakedefine01 BASE64_WITH_AVX512
#define HAVE_AVX512 BASE64_WITH_AVX512

#cmakedefine01 BASE64_WITH_NEON32
#define HAVE_NEON32 BASE64_WITH_NEON32

Expand Down
1 change: 1 addition & 0 deletions deps/base64/base64/include/libbase64.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ extern "C" {
#define BASE64_FORCE_SSE41 (1 << 5)
#define BASE64_FORCE_SSE42 (1 << 6)
#define BASE64_FORCE_AVX (1 << 7)
#define BASE64_FORCE_AVX512 (1 << 8)

struct base64_state {
int eof;
Expand Down
30 changes: 27 additions & 3 deletions deps/base64/base64/lib/arch/avx/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,43 @@
#if HAVE_AVX
#include <immintrin.h>

// Only enable inline assembly on supported compilers.
#ifndef BASE64_AVX_USE_ASM
# if defined(__GNUC__) || defined(__clang__)
# define BASE64_AVX_USE_ASM 1
# else
# define BASE64_AVX_USE_ASM 0
# endif
#endif

#include "../ssse3/dec_reshuffle.c"
#include "../ssse3/dec_loop.c"
#include "../ssse3/enc_translate.c"
#include "../ssse3/enc_reshuffle.c"
#include "../ssse3/enc_loop.c"

#if BASE64_AVX_USE_ASM
# include "enc_loop_asm.c"
#else
# include "../ssse3/enc_translate.c"
# include "../ssse3/enc_reshuffle.c"
# include "../ssse3/enc_loop.c"
#endif

#endif // HAVE_AVX

BASE64_ENC_FUNCTION(avx)
{
#if HAVE_AVX
#include "../generic/enc_head.c"

// For supported compilers, use a hand-optimized inline assembly
// encoder. Otherwise fall back on the SSSE3 encoder, but compiled with
// AVX flags to generate better optimized AVX code.

#if BASE64_AVX_USE_ASM
enc_loop_avx(&s, &slen, &o, &olen);
#else
enc_loop_ssse3(&s, &slen, &o, &olen);
#endif

#include "../generic/enc_tail.c"
#else
BASE64_ENC_STUB
Expand Down
Loading