Skip to content

Commit

Permalink
[bm] improve charconv benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed Mar 16, 2022
1 parent f31fc2d commit 2fd5583
Show file tree
Hide file tree
Showing 6 changed files with 580 additions and 239 deletions.
103 changes: 92 additions & 11 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ env:


jobs:
benchmarks:
name: bm/c++${{matrix.std}}/${{matrix.cxx}}/${{matrix.bt}}
bm_x86_64:
name: bm/x86_64/c++${{matrix.std}}/${{matrix.cxx}}/${{matrix.bt}}
if: |
(!contains(github.event.head_commit.message, 'skip all')) ||
(!contains(github.event.head_commit.message, 'skip benchmarks')) ||
Expand All @@ -36,23 +36,15 @@ jobs:
fail-fast: false
matrix:
include:
- {std: 11, cxx: g++-10, bt: Debug , os: ubuntu-18.04, bitlinks: static64 static32}
- {std: 11, cxx: g++-10, bt: Release, os: ubuntu-18.04, bitlinks: static64 static32}
- {std: 17, cxx: g++-10, bt: Debug , os: ubuntu-18.04, bitlinks: static64 static32}
- {std: 17, cxx: g++-10, bt: Release, os: ubuntu-18.04, bitlinks: static64 static32}
- {std: 20, cxx: g++-10, bt: Debug , os: ubuntu-18.04, bitlinks: static64 static32}
- {std: 20, cxx: g++-10, bt: Release, os: ubuntu-18.04, bitlinks: static64 static32}
#
- {std: 11, cxx: vs2019, bt: Debug , os: windows-2019, bitlinks: static64 static32}
- {std: 11, cxx: vs2019, bt: Release, os: windows-2019, bitlinks: static64 static32}
- {std: 17, cxx: vs2019, bt: Debug , os: windows-2019, bitlinks: static64 static32}
- {std: 17, cxx: vs2019, bt: Release, os: windows-2019, bitlinks: static64 static32}
- {std: 20, cxx: vs2019, bt: Debug , os: windows-2019, bitlinks: static64 static32}
- {std: 20, cxx: vs2019, bt: Release, os: windows-2019, bitlinks: static64 static32}
#
- {std: 11, cxx: xcode, xcver: 13, bt: Debug , os: macos-11, bitlinks: static64}
- {std: 11, cxx: xcode, xcver: 13, bt: Release, os: macos-11, bitlinks: static64}
- {std: 17, cxx: xcode, xcver: 13, bt: Debug , os: macos-11, bitlinks: static64}
- {std: 17, cxx: xcode, xcver: 13, bt: Release, os: macos-11, bitlinks: static64}
env: {BM: ON, STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
Expand Down Expand Up @@ -81,7 +73,7 @@ jobs:
set -x
desc=$(git describe || git rev-parse --short HEAD)
for bl in ${{matrix.bitlinks}} ; do
dst=$(echo benchmark_results/$desc/${{matrix.cxx}}-${{matrix.bt}}-c++${{matrix.std}}-$bl | sed 's:++-:xx:g' | sed 's:+:x:g')
dst=$(echo benchmark_results/$desc/x86_64/${{matrix.cxx}}-${{matrix.bt}}-c++${{matrix.std}}-$bl | sed 's:++-:xx:g' | sed 's:+:x:g')
mkdir -p $dst
find build -name bm-results
mv -vf build/$bl/bm/bm-results/* $dst/.
Expand All @@ -91,3 +83,92 @@ jobs:
with:
name: benchmark_results
path: benchmark_results/

#--------------------------------------------------------------------------------------------------
bm_rarearch:
name: bm/${{matrix.arch}}/c++${{matrix.std}}/${{matrix.bt}}
if: |
(!contains(github.event.head_commit.message, 'skip all')) ||
(!contains(github.event.head_commit.message, 'skip benchmarks')) ||
contains(github.event.head_commit.message, 'only benchmarks')
continue-on-error: true
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- {std: 11, bt: Release, arch: aarch64, distro: ubuntu20.04}
- {std: 17, bt: Release, arch: aarch64, distro: ubuntu20.04}
#
- {std: 11, bt: Release, arch: ppc64le, distro: ubuntu20.04}
- {std: 17, bt: Release, arch: ppc64le, distro: ubuntu20.04}
#
# the github runners are failing for the following:
#- {std: 11, bt: Release, arch: s390x , distro: ubuntu20.04}
#- {std: 17, bt: Release, arch: s390x , distro: ubuntu20.04}
##
#- {std: 11, bt: Release, arch: armv6 , distro: ubuntu18.04}
#- {std: 17, bt: Release, arch: armv6 , distro: ubuntu18.04}
##
#- {std: 11, bt: Release, arch: armv7 , distro: ubuntu18.04}
#- {std: 17, bt: Release, arch: armv7 , distro: ubuntu18.04}
steps:
- {name: checkout, uses: actions/checkout@v2, with: {submodules: recursive}}
- name: test
uses: uraimo/run-on-arch-action@v2.0.5
with:
arch: ${{matrix.arch}}
distro: ${{matrix.distro}}
install: |
set -x
apt-get update -y
apt-get install -y \
git \
build-essential
# arm platforms need an up-to-date cmake:
# https://gitlab.kitware.com/cmake/cmake/-/issues/20568
if [ "${{matrix.arch}}" == "armv6" ] || [ "${{matrix.arch}}" == "armv7" ] ; then
apt-get install -y \
gpg \
wget \
apt-transport-https
wget --no-check-certificate -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null
apt-get update -y
rm /usr/share/keyrings/kitware-archive-keyring.gpg
apt-get install kitware-archive-keyring
apt-get update -y
fi
apt-get install -y cmake cmake-data
cmake --version
run: |
set -x
uname -a
pwd
ls -lFhp .
#
bdir=build_${{matrix.arch}}_${{matrix.bt}}_${{matrix.std}}
idir=install_${{matrix.arch}}_${{matrix.bt}}_${{matrix.std}}
mkdir -p $bdir
#
cmake -S . -B $bdir \
-DCMAKE_INSTALL_PREFIX=$idir \
-DCMAKE_BUILD_TYPE=${{matrix.bt}} \
-DC4_CXX_STANDARD=${{matrix.std}} \
-DCXX_STANDARD=${{matrix.std}} \
-DC4CORE_DEV=ON \
-DC4CORE_BUILD_TESTS=OFF \
-DC4CORE_BUILD_BENCHMARKS=ON \
-DC4CORE_SANITIZE=OFF \
-DC4CORE_LINT=OFF \
-DC4CORE_VALGRIND=OFF
#
cmake --build $bdir -j --target c4core-bm-build
#
cmake --build $bdir -j 1 --target c4core-bm-run
#
desc=$(git describe || git rev-parse --short HEAD)
dst=$(echo benchmark_results/$desc/${{matrix.arch}}/${{matrix.bt}}-c++${{matrix.std}} | sed 's:++-:xx:g' | sed 's:+:x:g')
mkdir -p $dst
find $bdir -name bm-results
mv -vf $bdir/bm/bm-results/* $dst/.
216 changes: 146 additions & 70 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,73 +92,6 @@ are transitively used.
All of the utilities in this library are under the namespace `c4`; any
exposed macros use the prefix `C4_`: eg `C4_ASSERT()`.

### Multi-platform / multi-compiler utilities

```c++
// TODO: elaborate on the topics:
#include <c4/error.hpp>

C4_LIKELY()/C4_UNLIKELY()

C4_RESTRICT, $, c$, $$, c$$
#include <c4/restrict.hpp>
#include <c4/unrestrict.hpp>

#include <c4/windows_push.hpp>
#include <c4/windows_pop.hpp>

C4_UNREACHABLE()

c4::type_name()
```

### Runtime assertions and error handling

```c++
// TODO: elaborate on the topics:

error callback

C4_ASSERT()
C4_XASSERT()
C4_CHECK()

C4_ERROR()
C4_NOT_IMPLEMENTED()
```

### Memory allocation

```c++
// TODO: elaborate on the topics:

c4::aalloc(), c4::afree() // aligned allocation

c4::MemoryResource // global and scope

c4::Allocator
```

### Mass initialization/construction/destruction

```c++
// TODO: elaborate on the topics:

c4::construct()/c4::construct_n()

c4::destroy()/c4::destroy_n()

c4::copy_construct()/c4::copy_construct_n()

c4::copy_assign()/c4::copy_assign_n()

c4::move_construct()/c4::move_construct_n()

c4::move_assign()/c4::move_assign_n()

c4::make_room()/c4::destroy_room()
```


### Writeable string views: c4::substr and c4::csubstr

Expand All @@ -172,16 +105,78 @@ Here: [`#include <c4/charconv.hpp>`](src/c4/charconv.hpp)
```c++
// TODO: elaborate on the topics:

c4::read_dec(), c4::write_dec()
c4::read_hex(), c4::write_hex()
c4::read_oct(), c4::write_oct()
c4::read_bin(), c4::write_bin()

c4::utoa(), c4::atou()
c4::itoa(), c4::atoi()
c4::ftoa(), c4::atof()
c4::dtoa(), c4::atod()
c4::xtoa(), c4::atox()

c4::to_chars(), c4::from_chars()
c4::to_chars_sub()
c4::to_chars_first()
```

The charconv funcions above are very fast; even faster than C++'s fastest facility `std::from_chars()`, `std::to_chars()`. For continuous benchmark results, browse through c4core's [github CI benchmark runs](https://github.com/biojppm/c4core/actions/workflows/benchmarks.yml). For example, a benchmark run on Linux/g++11.2 shows that:
- `c4::to_chars()` can be expected to be roughly...
- ~40% to 2x faster than `std::to_chars()`
- ~10x-30x faster than `sprintf()`
- ~50x-100x faster than a naive `stringstream::operator<<()` followed by `stringstream::str()`
- `c4::from_chars()` can be expected to be roughly...
- ~10%-30% faster than `std::from_chars()`
- ~10x faster than `scanf()`
- ~30x-50x faster than a naive `stringstream::str()` followed by `stringstream::operator>>()`

Here are the results:

| Write throughput | | Read throughput | |
|:-------------------------|--------:|:-------------------------|---------:|
| **write `uint8_t`** | **MB/s**| **read `uint8_t`** | **MB/s**|
| `c4::to_chars<u8>` | 526.86 | `c4::from_chars<u8>` | 163.06 |
| `std::to_chars<u8>` | 379.03 | `std::from_chars<u8>` | 154.85 |
| `std::sprintf<u8>` | 20.49 | `std::scanf<u8>` | 15.75 |
| `std::stringstream<u8>` | 3.82 | `std::stringstream<u8>` | 3.83 |
| **write `int8_t`** | **MB/s**| **read `int8_t`** | **MB/s**|
| `c4::to_chars<i8>` | 599.98 | `c4::from_chars<i8>` | 184.20 |
| `std::to_chars<i8>` | 246.32 | `std::from_chars<i8>` | 156.40 |
| `std::sprintf<i8>` | 19.15 | `std::scanf<i8>` | 16.44 |
| `std::stringstream<i8>` | 3.83 | `std::stringstream<i8>` | 3.89 |
| **write `uint16_t`** | **MB/s**| **read `uint16_t`** | **MB/s**|
| `c4::to_chars<u16>` | 486.40 | `c4::from_chars<u16>` | 349.48 |
| `std::to_chars<u16>` | 454.24 | `std::from_chars<u16>` | 319.13 |
| `std::sprintf<u16>` | 38.74 | `std::scanf<u16>` | 28.12 |
| `std::stringstream<u16>` | 7.08 | `std::stringstream<u16>`| 6.73 |
| **write `int16_t`** | **MB/s**| **read `int16_t`** | **MB/s**|
| `c4::to_chars<i16>` | 507.44 | `c4::from_chars<i16>` | 282.95 |
| `std::to_chars<i16>` | 297.49 | `std::from_chars<i16>` | 186.18 |
| `std::sprintf<i16>` | 39.03 | `std::scanf<i16>` | 28.45 |
| `std::stringstream<i16>` | 6.98 | `std::stringstream<i16>`| 6.49 |
| **write `uint32_t`** | **MB/s**| **read `uint32_t`** | **MB/s**|
| `c4::to_chars<u32>` | 730.12 | `c4::from_chars<u32>` | 463.95 |
| `std::to_chars<u32>` | 514.76 | `std::from_chars<u32>` | 329.42 |
| `std::sprintf<u32>` | 71.19 | `std::scanf<u32>` | 44.97 |
| `std::stringstream<u32>` | 14.05 | `std::stringstream<u32>`| 12.57 |
| **write `int32_t`** | **MB/s**| **read `int32_t`** | **MB/s**|
| `c4::to_chars<i32>` | 618.76 | `c4::from_chars<i32>` | 345.53 |
| `std::to_chars<i32>` | 394.72 | `std::from_chars<i32>` | 224.46 |
| `std::sprintf<i32>` | 71.14 | `std::scanf<i32>` | 43.49 |
| `std::stringstream<i32>` | 13.91 | `std::stringstream<i32>`| 12.03 |
| **write `uint64_t`** | **MB/s**| **read `uint64_t`** | **MB/s**|
| `c4::to_chars<u64>` | 1118.87 | `c4::from_chars<u64>` | 928.49 |
| `std::to_chars<u64>` | 886.58 | `std::from_chars<u64>` | 759.03 |
| `std::sprintf<u64>` | 140.96 | `std::scanf<u64>` | 91.60 |
| `std::stringstream<u64>` | 28.01 | `std::stringstream<u64>`| 25.00 |
| **write `int64_t`** | **MB/s**| **read `int64_t`** | **MB/s**|
| `c4::to_chars<i64>` | 1198.78 | `c4::from_chars<i64>` | 713.76 |
| `std::to_chars<i64>` | 882.17 | `std::from_chars<i64>` | 646.18 |
| `std::sprintf<i64>` | 138.79 | `std::scanf<i64>` | 90.07 |
| `std::stringstream<i64>` | 27.62 | `std::stringstream<i64>`| 25.12 |


### String formatting and parsing

* [`#include <c4/format.hpp>`](src/c4/format.hpp)
Expand All @@ -190,13 +185,26 @@ c4::to_chars_first()
// TODO: elaborate on the topics:

c4::cat(), c4::uncat()

c4::catsep(), c4::uncatsep()

c4::format(), c4::unformat()

c4::catrs()
c4::catseprs()
c4::formatrs()

// formatting:
c4::raw, c4::craw
c4::fmt::overflow_checked
c4::fmt::real
c4::fmt::boolalpha
c4::fmt::dec
c4::fmt::hex
c4::fmt::oct
c4::fmt::bin
c4::fmt::zpad
c4::fmt::right
c4::fmt::left
c4::fmt::raw, c4::fmt::craw
c4::fmt::base64, c4::fmt::cbase64
```

### `c4::span` and `c4::blob`
Expand Down Expand Up @@ -230,3 +238,71 @@ c4::bm2str(), c4::str2bm()
[`#include <c4/base64.hpp>`](src/c4/base64.hpp)

### Fuzzy float comparison


### Multi-platform / multi-compiler utilities

```c++
// TODO: elaborate on the topics:
#include <c4/error.hpp>

C4_LIKELY()/C4_UNLIKELY()

C4_RESTRICT, $, c$, $$, c$$
#include <c4/restrict.hpp>
#include <c4/unrestrict.hpp>

#include <c4/windows_push.hpp>
#include <c4/windows_pop.hpp>

C4_UNREACHABLE()

c4::type_name()
```

### Runtime assertions and error handling

```c++
// TODO: elaborate on the topics:

error callback

C4_ASSERT()
C4_XASSERT()
C4_CHECK()

C4_ERROR()
C4_NOT_IMPLEMENTED()
```

### Memory allocation

```c++
// TODO: elaborate on the topics:

c4::aalloc(), c4::afree() // aligned allocation

c4::MemoryResource // global and scope

c4::Allocator
```

### Mass initialization/construction/destruction

```c++
// TODO: elaborate on the topics:

c4::construct()/c4::construct_n()

c4::destroy()/c4::destroy_n()

c4::copy_construct()/c4::copy_construct_n()

c4::copy_assign()/c4::copy_assign_n()

c4::move_construct()/c4::move_construct_n()

c4::move_assign()/c4::move_assign_n()

c4::make_room()/c4::destroy_room()
```
Loading

0 comments on commit 2fd5583

Please sign in to comment.